|
{ |
|
"best_global_step": 326, |
|
"best_metric": 1.5841516256332397, |
|
"best_model_checkpoint": "results_t5base/checkpoint-326", |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 326, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 116.47437286376953, |
|
"learning_rate": 1.8e-05, |
|
"loss": 15.8526, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 36.57353591918945, |
|
"learning_rate": 3.8e-05, |
|
"loss": 11.7488, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18404907975460122, |
|
"grad_norm": 32.9762077331543, |
|
"learning_rate": 5.800000000000001e-05, |
|
"loss": 10.3287, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24539877300613497, |
|
"grad_norm": 23.83042335510254, |
|
"learning_rate": 7.8e-05, |
|
"loss": 6.4168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3067484662576687, |
|
"grad_norm": 4.865868091583252, |
|
"learning_rate": 9.800000000000001e-05, |
|
"loss": 3.5254, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36809815950920244, |
|
"grad_norm": 12.269708633422852, |
|
"learning_rate": 0.000118, |
|
"loss": 3.0389, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4294478527607362, |
|
"grad_norm": 7.369626998901367, |
|
"learning_rate": 0.00013800000000000002, |
|
"loss": 2.5275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49079754601226994, |
|
"grad_norm": 2.0186314582824707, |
|
"learning_rate": 0.000158, |
|
"loss": 2.1442, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5521472392638037, |
|
"grad_norm": 1.231919527053833, |
|
"learning_rate": 0.000178, |
|
"loss": 1.8347, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6134969325153374, |
|
"grad_norm": 1.3266997337341309, |
|
"learning_rate": 0.00019800000000000002, |
|
"loss": 1.7494, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6748466257668712, |
|
"grad_norm": 1.498828649520874, |
|
"learning_rate": 0.000218, |
|
"loss": 1.8056, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7361963190184049, |
|
"grad_norm": 0.9476116299629211, |
|
"learning_rate": 0.00023799999999999998, |
|
"loss": 1.7621, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7975460122699386, |
|
"grad_norm": 1.154313564300537, |
|
"learning_rate": 0.00025800000000000004, |
|
"loss": 1.7556, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8588957055214724, |
|
"grad_norm": 0.7778469920158386, |
|
"learning_rate": 0.00027800000000000004, |
|
"loss": 1.7215, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9202453987730062, |
|
"grad_norm": 0.8308514356613159, |
|
"learning_rate": 0.000298, |
|
"loss": 1.6761, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9815950920245399, |
|
"grad_norm": 1.156718373298645, |
|
"learning_rate": 0.00031800000000000003, |
|
"loss": 1.6865, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.5963470935821533, |
|
"eval_rouge1": 0.6129, |
|
"eval_rouge2": 0.2686, |
|
"eval_rougeL": 0.4794, |
|
"eval_runtime": 30.837, |
|
"eval_samples_per_second": 8.885, |
|
"eval_steps_per_second": 1.135, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.0429447852760736, |
|
"grad_norm": 0.8923042416572571, |
|
"learning_rate": 0.00033800000000000003, |
|
"loss": 1.6244, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1042944785276074, |
|
"grad_norm": 1.0038191080093384, |
|
"learning_rate": 0.000358, |
|
"loss": 1.6075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.165644171779141, |
|
"grad_norm": 0.9305262565612793, |
|
"learning_rate": 0.000378, |
|
"loss": 1.6722, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2269938650306749, |
|
"grad_norm": 1.2464691400527954, |
|
"learning_rate": 0.000398, |
|
"loss": 1.5723, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2883435582822087, |
|
"grad_norm": 1.017632246017456, |
|
"learning_rate": 0.00041799999999999997, |
|
"loss": 1.5711, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.3496932515337423, |
|
"grad_norm": 1.2981927394866943, |
|
"learning_rate": 0.000438, |
|
"loss": 1.5017, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4110429447852761, |
|
"grad_norm": 0.7919584512710571, |
|
"learning_rate": 0.000458, |
|
"loss": 1.5671, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4723926380368098, |
|
"grad_norm": 1.0223811864852905, |
|
"learning_rate": 0.00047799999999999996, |
|
"loss": 1.5162, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5337423312883436, |
|
"grad_norm": 0.8784969449043274, |
|
"learning_rate": 0.000498, |
|
"loss": 1.5689, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5950920245398774, |
|
"grad_norm": 0.8144865036010742, |
|
"learning_rate": 0.000518, |
|
"loss": 1.6243, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.656441717791411, |
|
"grad_norm": 0.846225380897522, |
|
"learning_rate": 0.0005380000000000001, |
|
"loss": 1.5596, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7177914110429446, |
|
"grad_norm": 0.9448590278625488, |
|
"learning_rate": 0.000558, |
|
"loss": 1.399, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7791411042944785, |
|
"grad_norm": 0.9131848812103271, |
|
"learning_rate": 0.000578, |
|
"loss": 1.6336, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8404907975460123, |
|
"grad_norm": 0.8644697070121765, |
|
"learning_rate": 0.000598, |
|
"loss": 1.6112, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9018404907975461, |
|
"grad_norm": 0.964495062828064, |
|
"learning_rate": 0.0006180000000000001, |
|
"loss": 1.5901, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9631901840490797, |
|
"grad_norm": 0.8454228043556213, |
|
"learning_rate": 0.000638, |
|
"loss": 1.609, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.5841516256332397, |
|
"eval_rouge1": 0.6218, |
|
"eval_rouge2": 0.2773, |
|
"eval_rougeL": 0.4814, |
|
"eval_runtime": 30.1539, |
|
"eval_samples_per_second": 9.087, |
|
"eval_steps_per_second": 1.161, |
|
"step": 326 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 815, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1780369116364800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|