UCCIX-Llama2-13B-Instruct-191224 / trainer_state.json
tktung's picture
Upload folder using huggingface_hub
3b69922 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9952755905511811,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012598425196850394,
"grad_norm": 1.130629539489746,
"learning_rate": 6.25e-06,
"loss": 1.6965,
"step": 1
},
{
"epoch": 0.025196850393700787,
"grad_norm": 1.1764330863952637,
"learning_rate": 1.25e-05,
"loss": 1.7042,
"step": 2
},
{
"epoch": 0.050393700787401574,
"grad_norm": 0.18122103810310364,
"learning_rate": 2.5e-05,
"loss": 1.5295,
"step": 4
},
{
"epoch": 0.07559055118110236,
"grad_norm": 0.4274924695491791,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.3971,
"step": 6
},
{
"epoch": 0.10078740157480315,
"grad_norm": 0.7392313480377197,
"learning_rate": 5e-05,
"loss": 1.5099,
"step": 8
},
{
"epoch": 0.12598425196850394,
"grad_norm": 0.29404416680336,
"learning_rate": 6.25e-05,
"loss": 1.4144,
"step": 10
},
{
"epoch": 0.15118110236220472,
"grad_norm": 0.18568824231624603,
"learning_rate": 7.500000000000001e-05,
"loss": 1.3341,
"step": 12
},
{
"epoch": 0.1763779527559055,
"grad_norm": 0.16120545566082,
"learning_rate": 8.75e-05,
"loss": 1.3011,
"step": 14
},
{
"epoch": 0.2015748031496063,
"grad_norm": 0.09310784935951233,
"learning_rate": 0.0001,
"loss": 1.2458,
"step": 16
},
{
"epoch": 0.22677165354330708,
"grad_norm": 0.07340509444475174,
"learning_rate": 9.998903417374228e-05,
"loss": 1.215,
"step": 18
},
{
"epoch": 0.25196850393700787,
"grad_norm": 0.06960318982601166,
"learning_rate": 9.995614150494293e-05,
"loss": 1.2052,
"step": 20
},
{
"epoch": 0.27716535433070866,
"grad_norm": 0.058640073984861374,
"learning_rate": 9.990133642141359e-05,
"loss": 1.1913,
"step": 22
},
{
"epoch": 0.30236220472440944,
"grad_norm": 0.04140308499336243,
"learning_rate": 9.982464296247522e-05,
"loss": 1.1544,
"step": 24
},
{
"epoch": 0.32755905511811023,
"grad_norm": 0.05067910999059677,
"learning_rate": 9.972609476841367e-05,
"loss": 1.146,
"step": 26
},
{
"epoch": 0.352755905511811,
"grad_norm": 0.03361475095152855,
"learning_rate": 9.96057350657239e-05,
"loss": 1.1136,
"step": 28
},
{
"epoch": 0.3779527559055118,
"grad_norm": 0.029164660722017288,
"learning_rate": 9.946361664814943e-05,
"loss": 1.1128,
"step": 30
},
{
"epoch": 0.4031496062992126,
"grad_norm": 0.03163473680615425,
"learning_rate": 9.929980185352526e-05,
"loss": 1.0801,
"step": 32
},
{
"epoch": 0.4283464566929134,
"grad_norm": 0.02621687948703766,
"learning_rate": 9.911436253643445e-05,
"loss": 1.0853,
"step": 34
},
{
"epoch": 0.45354330708661417,
"grad_norm": 0.02314259298145771,
"learning_rate": 9.890738003669029e-05,
"loss": 1.0942,
"step": 36
},
{
"epoch": 0.47874015748031495,
"grad_norm": 0.027407709509134293,
"learning_rate": 9.867894514365802e-05,
"loss": 1.074,
"step": 38
},
{
"epoch": 0.5039370078740157,
"grad_norm": 0.02250426821410656,
"learning_rate": 9.842915805643155e-05,
"loss": 1.0547,
"step": 40
},
{
"epoch": 0.5291338582677165,
"grad_norm": 0.023746158927679062,
"learning_rate": 9.815812833988291e-05,
"loss": 1.0583,
"step": 42
},
{
"epoch": 0.5543307086614173,
"grad_norm": 0.020694848150014877,
"learning_rate": 9.786597487660337e-05,
"loss": 1.0374,
"step": 44
},
{
"epoch": 0.5795275590551181,
"grad_norm": 0.021605506539344788,
"learning_rate": 9.755282581475769e-05,
"loss": 1.0561,
"step": 46
},
{
"epoch": 0.6047244094488189,
"grad_norm": 0.08317892998456955,
"learning_rate": 9.721881851187406e-05,
"loss": 1.0554,
"step": 48
},
{
"epoch": 0.6299212598425197,
"grad_norm": 0.026709511876106262,
"learning_rate": 9.686409947459458e-05,
"loss": 1.049,
"step": 50
},
{
"epoch": 0.6551181102362205,
"grad_norm": 0.022177977487444878,
"learning_rate": 9.648882429441257e-05,
"loss": 1.0254,
"step": 52
},
{
"epoch": 0.6803149606299213,
"grad_norm": 0.05779964104294777,
"learning_rate": 9.609315757942503e-05,
"loss": 1.0084,
"step": 54
},
{
"epoch": 0.705511811023622,
"grad_norm": 0.03699235990643501,
"learning_rate": 9.567727288213005e-05,
"loss": 1.0201,
"step": 56
},
{
"epoch": 0.7307086614173228,
"grad_norm": 0.02411213330924511,
"learning_rate": 9.524135262330098e-05,
"loss": 1.0278,
"step": 58
},
{
"epoch": 0.7559055118110236,
"grad_norm": 0.030064478516578674,
"learning_rate": 9.478558801197065e-05,
"loss": 1.0288,
"step": 60
},
{
"epoch": 0.7811023622047244,
"grad_norm": 0.023079903796315193,
"learning_rate": 9.431017896156074e-05,
"loss": 1.0174,
"step": 62
},
{
"epoch": 0.8062992125984252,
"grad_norm": 0.025795839726924896,
"learning_rate": 9.381533400219318e-05,
"loss": 1.0124,
"step": 64
},
{
"epoch": 0.831496062992126,
"grad_norm": 0.022699084132909775,
"learning_rate": 9.330127018922194e-05,
"loss": 1.0029,
"step": 66
},
{
"epoch": 0.8566929133858268,
"grad_norm": 0.020135775208473206,
"learning_rate": 9.276821300802534e-05,
"loss": 1.0141,
"step": 68
},
{
"epoch": 0.8818897637795275,
"grad_norm": 0.022621216252446175,
"learning_rate": 9.221639627510076e-05,
"loss": 1.0075,
"step": 70
},
{
"epoch": 0.9070866141732283,
"grad_norm": 0.020864592865109444,
"learning_rate": 9.164606203550497e-05,
"loss": 1.0161,
"step": 72
},
{
"epoch": 0.9322834645669291,
"grad_norm": 0.02512463368475437,
"learning_rate": 9.105746045668521e-05,
"loss": 1.0012,
"step": 74
},
{
"epoch": 0.9574803149606299,
"grad_norm": 0.019894642755389214,
"learning_rate": 9.045084971874738e-05,
"loss": 0.9899,
"step": 76
},
{
"epoch": 0.9826771653543307,
"grad_norm": 0.025451743975281715,
"learning_rate": 8.982649590120982e-05,
"loss": 0.9971,
"step": 78
}
],
"logging_steps": 2,
"max_steps": 316,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 206778516111360.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}