|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9952755905511811, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012598425196850394, |
|
"grad_norm": 1.130629539489746, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.6965, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.025196850393700787, |
|
"grad_norm": 1.1764330863952637, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.7042, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.050393700787401574, |
|
"grad_norm": 0.18122103810310364, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5295, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07559055118110236, |
|
"grad_norm": 0.4274924695491791, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.3971, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10078740157480315, |
|
"grad_norm": 0.7392313480377197, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5099, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 0.29404416680336, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.4144, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15118110236220472, |
|
"grad_norm": 0.18568824231624603, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.3341, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1763779527559055, |
|
"grad_norm": 0.16120545566082, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.3011, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2015748031496063, |
|
"grad_norm": 0.09310784935951233, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2458, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22677165354330708, |
|
"grad_norm": 0.07340509444475174, |
|
"learning_rate": 9.998903417374228e-05, |
|
"loss": 1.215, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 0.06960318982601166, |
|
"learning_rate": 9.995614150494293e-05, |
|
"loss": 1.2052, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27716535433070866, |
|
"grad_norm": 0.058640073984861374, |
|
"learning_rate": 9.990133642141359e-05, |
|
"loss": 1.1913, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.30236220472440944, |
|
"grad_norm": 0.04140308499336243, |
|
"learning_rate": 9.982464296247522e-05, |
|
"loss": 1.1544, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32755905511811023, |
|
"grad_norm": 0.05067910999059677, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 1.146, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.352755905511811, |
|
"grad_norm": 0.03361475095152855, |
|
"learning_rate": 9.96057350657239e-05, |
|
"loss": 1.1136, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.029164660722017288, |
|
"learning_rate": 9.946361664814943e-05, |
|
"loss": 1.1128, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4031496062992126, |
|
"grad_norm": 0.03163473680615425, |
|
"learning_rate": 9.929980185352526e-05, |
|
"loss": 1.0801, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4283464566929134, |
|
"grad_norm": 0.02621687948703766, |
|
"learning_rate": 9.911436253643445e-05, |
|
"loss": 1.0853, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.45354330708661417, |
|
"grad_norm": 0.02314259298145771, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 1.0942, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.47874015748031495, |
|
"grad_norm": 0.027407709509134293, |
|
"learning_rate": 9.867894514365802e-05, |
|
"loss": 1.074, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 0.02250426821410656, |
|
"learning_rate": 9.842915805643155e-05, |
|
"loss": 1.0547, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5291338582677165, |
|
"grad_norm": 0.023746158927679062, |
|
"learning_rate": 9.815812833988291e-05, |
|
"loss": 1.0583, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5543307086614173, |
|
"grad_norm": 0.020694848150014877, |
|
"learning_rate": 9.786597487660337e-05, |
|
"loss": 1.0374, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5795275590551181, |
|
"grad_norm": 0.021605506539344788, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.0561, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6047244094488189, |
|
"grad_norm": 0.08317892998456955, |
|
"learning_rate": 9.721881851187406e-05, |
|
"loss": 1.0554, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.026709511876106262, |
|
"learning_rate": 9.686409947459458e-05, |
|
"loss": 1.049, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6551181102362205, |
|
"grad_norm": 0.022177977487444878, |
|
"learning_rate": 9.648882429441257e-05, |
|
"loss": 1.0254, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6803149606299213, |
|
"grad_norm": 0.05779964104294777, |
|
"learning_rate": 9.609315757942503e-05, |
|
"loss": 1.0084, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.705511811023622, |
|
"grad_norm": 0.03699235990643501, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 1.0201, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7307086614173228, |
|
"grad_norm": 0.02411213330924511, |
|
"learning_rate": 9.524135262330098e-05, |
|
"loss": 1.0278, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.030064478516578674, |
|
"learning_rate": 9.478558801197065e-05, |
|
"loss": 1.0288, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7811023622047244, |
|
"grad_norm": 0.023079903796315193, |
|
"learning_rate": 9.431017896156074e-05, |
|
"loss": 1.0174, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.8062992125984252, |
|
"grad_norm": 0.025795839726924896, |
|
"learning_rate": 9.381533400219318e-05, |
|
"loss": 1.0124, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.831496062992126, |
|
"grad_norm": 0.022699084132909775, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.0029, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8566929133858268, |
|
"grad_norm": 0.020135775208473206, |
|
"learning_rate": 9.276821300802534e-05, |
|
"loss": 1.0141, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 0.022621216252446175, |
|
"learning_rate": 9.221639627510076e-05, |
|
"loss": 1.0075, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9070866141732283, |
|
"grad_norm": 0.020864592865109444, |
|
"learning_rate": 9.164606203550497e-05, |
|
"loss": 1.0161, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.9322834645669291, |
|
"grad_norm": 0.02512463368475437, |
|
"learning_rate": 9.105746045668521e-05, |
|
"loss": 1.0012, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9574803149606299, |
|
"grad_norm": 0.019894642755389214, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.9899, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.9826771653543307, |
|
"grad_norm": 0.025451743975281715, |
|
"learning_rate": 8.982649590120982e-05, |
|
"loss": 0.9971, |
|
"step": 78 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 316, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 206778516111360.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|