flan-t5-youtube-summarizer / trainer_state.json
bilal521's picture
Upload model files
23d4ff6 verified
{
"best_global_step": 326,
"best_metric": 1.5841516256332397,
"best_model_checkpoint": "results_t5base/checkpoint-326",
"epoch": 2.0,
"eval_steps": 200,
"global_step": 326,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06134969325153374,
"grad_norm": 116.47437286376953,
"learning_rate": 1.8e-05,
"loss": 15.8526,
"step": 10
},
{
"epoch": 0.12269938650306748,
"grad_norm": 36.57353591918945,
"learning_rate": 3.8e-05,
"loss": 11.7488,
"step": 20
},
{
"epoch": 0.18404907975460122,
"grad_norm": 32.9762077331543,
"learning_rate": 5.800000000000001e-05,
"loss": 10.3287,
"step": 30
},
{
"epoch": 0.24539877300613497,
"grad_norm": 23.83042335510254,
"learning_rate": 7.8e-05,
"loss": 6.4168,
"step": 40
},
{
"epoch": 0.3067484662576687,
"grad_norm": 4.865868091583252,
"learning_rate": 9.800000000000001e-05,
"loss": 3.5254,
"step": 50
},
{
"epoch": 0.36809815950920244,
"grad_norm": 12.269708633422852,
"learning_rate": 0.000118,
"loss": 3.0389,
"step": 60
},
{
"epoch": 0.4294478527607362,
"grad_norm": 7.369626998901367,
"learning_rate": 0.00013800000000000002,
"loss": 2.5275,
"step": 70
},
{
"epoch": 0.49079754601226994,
"grad_norm": 2.0186314582824707,
"learning_rate": 0.000158,
"loss": 2.1442,
"step": 80
},
{
"epoch": 0.5521472392638037,
"grad_norm": 1.231919527053833,
"learning_rate": 0.000178,
"loss": 1.8347,
"step": 90
},
{
"epoch": 0.6134969325153374,
"grad_norm": 1.3266997337341309,
"learning_rate": 0.00019800000000000002,
"loss": 1.7494,
"step": 100
},
{
"epoch": 0.6748466257668712,
"grad_norm": 1.498828649520874,
"learning_rate": 0.000218,
"loss": 1.8056,
"step": 110
},
{
"epoch": 0.7361963190184049,
"grad_norm": 0.9476116299629211,
"learning_rate": 0.00023799999999999998,
"loss": 1.7621,
"step": 120
},
{
"epoch": 0.7975460122699386,
"grad_norm": 1.154313564300537,
"learning_rate": 0.00025800000000000004,
"loss": 1.7556,
"step": 130
},
{
"epoch": 0.8588957055214724,
"grad_norm": 0.7778469920158386,
"learning_rate": 0.00027800000000000004,
"loss": 1.7215,
"step": 140
},
{
"epoch": 0.9202453987730062,
"grad_norm": 0.8308514356613159,
"learning_rate": 0.000298,
"loss": 1.6761,
"step": 150
},
{
"epoch": 0.9815950920245399,
"grad_norm": 1.156718373298645,
"learning_rate": 0.00031800000000000003,
"loss": 1.6865,
"step": 160
},
{
"epoch": 1.0,
"eval_gen_len": 187.0146,
"eval_loss": 1.5963470935821533,
"eval_rouge1": 0.6129,
"eval_rouge2": 0.2686,
"eval_rougeL": 0.4794,
"eval_runtime": 30.837,
"eval_samples_per_second": 8.885,
"eval_steps_per_second": 1.135,
"step": 163
},
{
"epoch": 1.0429447852760736,
"grad_norm": 0.8923042416572571,
"learning_rate": 0.00033800000000000003,
"loss": 1.6244,
"step": 170
},
{
"epoch": 1.1042944785276074,
"grad_norm": 1.0038191080093384,
"learning_rate": 0.000358,
"loss": 1.6075,
"step": 180
},
{
"epoch": 1.165644171779141,
"grad_norm": 0.9305262565612793,
"learning_rate": 0.000378,
"loss": 1.6722,
"step": 190
},
{
"epoch": 1.2269938650306749,
"grad_norm": 1.2464691400527954,
"learning_rate": 0.000398,
"loss": 1.5723,
"step": 200
},
{
"epoch": 1.2883435582822087,
"grad_norm": 1.017632246017456,
"learning_rate": 0.00041799999999999997,
"loss": 1.5711,
"step": 210
},
{
"epoch": 1.3496932515337423,
"grad_norm": 1.2981927394866943,
"learning_rate": 0.000438,
"loss": 1.5017,
"step": 220
},
{
"epoch": 1.4110429447852761,
"grad_norm": 0.7919584512710571,
"learning_rate": 0.000458,
"loss": 1.5671,
"step": 230
},
{
"epoch": 1.4723926380368098,
"grad_norm": 1.0223811864852905,
"learning_rate": 0.00047799999999999996,
"loss": 1.5162,
"step": 240
},
{
"epoch": 1.5337423312883436,
"grad_norm": 0.8784969449043274,
"learning_rate": 0.000498,
"loss": 1.5689,
"step": 250
},
{
"epoch": 1.5950920245398774,
"grad_norm": 0.8144865036010742,
"learning_rate": 0.000518,
"loss": 1.6243,
"step": 260
},
{
"epoch": 1.656441717791411,
"grad_norm": 0.846225380897522,
"learning_rate": 0.0005380000000000001,
"loss": 1.5596,
"step": 270
},
{
"epoch": 1.7177914110429446,
"grad_norm": 0.9448590278625488,
"learning_rate": 0.000558,
"loss": 1.399,
"step": 280
},
{
"epoch": 1.7791411042944785,
"grad_norm": 0.9131848812103271,
"learning_rate": 0.000578,
"loss": 1.6336,
"step": 290
},
{
"epoch": 1.8404907975460123,
"grad_norm": 0.8644697070121765,
"learning_rate": 0.000598,
"loss": 1.6112,
"step": 300
},
{
"epoch": 1.9018404907975461,
"grad_norm": 0.964495062828064,
"learning_rate": 0.0006180000000000001,
"loss": 1.5901,
"step": 310
},
{
"epoch": 1.9631901840490797,
"grad_norm": 0.8454228043556213,
"learning_rate": 0.000638,
"loss": 1.609,
"step": 320
},
{
"epoch": 2.0,
"eval_gen_len": 187.0146,
"eval_loss": 1.5841516256332397,
"eval_rouge1": 0.6218,
"eval_rouge2": 0.2773,
"eval_rougeL": 0.4814,
"eval_runtime": 30.1539,
"eval_samples_per_second": 9.087,
"eval_steps_per_second": 1.161,
"step": 326
}
],
"logging_steps": 10,
"max_steps": 815,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1780369116364800.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}