{ "best_global_step": 326, "best_metric": 1.5841516256332397, "best_model_checkpoint": "results_t5base/checkpoint-326", "epoch": 2.0, "eval_steps": 200, "global_step": 326, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06134969325153374, "grad_norm": 116.47437286376953, "learning_rate": 1.8e-05, "loss": 15.8526, "step": 10 }, { "epoch": 0.12269938650306748, "grad_norm": 36.57353591918945, "learning_rate": 3.8e-05, "loss": 11.7488, "step": 20 }, { "epoch": 0.18404907975460122, "grad_norm": 32.9762077331543, "learning_rate": 5.800000000000001e-05, "loss": 10.3287, "step": 30 }, { "epoch": 0.24539877300613497, "grad_norm": 23.83042335510254, "learning_rate": 7.8e-05, "loss": 6.4168, "step": 40 }, { "epoch": 0.3067484662576687, "grad_norm": 4.865868091583252, "learning_rate": 9.800000000000001e-05, "loss": 3.5254, "step": 50 }, { "epoch": 0.36809815950920244, "grad_norm": 12.269708633422852, "learning_rate": 0.000118, "loss": 3.0389, "step": 60 }, { "epoch": 0.4294478527607362, "grad_norm": 7.369626998901367, "learning_rate": 0.00013800000000000002, "loss": 2.5275, "step": 70 }, { "epoch": 0.49079754601226994, "grad_norm": 2.0186314582824707, "learning_rate": 0.000158, "loss": 2.1442, "step": 80 }, { "epoch": 0.5521472392638037, "grad_norm": 1.231919527053833, "learning_rate": 0.000178, "loss": 1.8347, "step": 90 }, { "epoch": 0.6134969325153374, "grad_norm": 1.3266997337341309, "learning_rate": 0.00019800000000000002, "loss": 1.7494, "step": 100 }, { "epoch": 0.6748466257668712, "grad_norm": 1.498828649520874, "learning_rate": 0.000218, "loss": 1.8056, "step": 110 }, { "epoch": 0.7361963190184049, "grad_norm": 0.9476116299629211, "learning_rate": 0.00023799999999999998, "loss": 1.7621, "step": 120 }, { "epoch": 0.7975460122699386, "grad_norm": 1.154313564300537, "learning_rate": 0.00025800000000000004, "loss": 1.7556, "step": 130 }, { "epoch": 0.8588957055214724, "grad_norm": 0.7778469920158386, "learning_rate": 0.00027800000000000004, "loss": 1.7215, "step": 140 }, { "epoch": 0.9202453987730062, "grad_norm": 0.8308514356613159, "learning_rate": 0.000298, "loss": 1.6761, "step": 150 }, { "epoch": 0.9815950920245399, "grad_norm": 1.156718373298645, "learning_rate": 0.00031800000000000003, "loss": 1.6865, "step": 160 }, { "epoch": 1.0, "eval_gen_len": 187.0146, "eval_loss": 1.5963470935821533, "eval_rouge1": 0.6129, "eval_rouge2": 0.2686, "eval_rougeL": 0.4794, "eval_runtime": 30.837, "eval_samples_per_second": 8.885, "eval_steps_per_second": 1.135, "step": 163 }, { "epoch": 1.0429447852760736, "grad_norm": 0.8923042416572571, "learning_rate": 0.00033800000000000003, "loss": 1.6244, "step": 170 }, { "epoch": 1.1042944785276074, "grad_norm": 1.0038191080093384, "learning_rate": 0.000358, "loss": 1.6075, "step": 180 }, { "epoch": 1.165644171779141, "grad_norm": 0.9305262565612793, "learning_rate": 0.000378, "loss": 1.6722, "step": 190 }, { "epoch": 1.2269938650306749, "grad_norm": 1.2464691400527954, "learning_rate": 0.000398, "loss": 1.5723, "step": 200 }, { "epoch": 1.2883435582822087, "grad_norm": 1.017632246017456, "learning_rate": 0.00041799999999999997, "loss": 1.5711, "step": 210 }, { "epoch": 1.3496932515337423, "grad_norm": 1.2981927394866943, "learning_rate": 0.000438, "loss": 1.5017, "step": 220 }, { "epoch": 1.4110429447852761, "grad_norm": 0.7919584512710571, "learning_rate": 0.000458, "loss": 1.5671, "step": 230 }, { "epoch": 1.4723926380368098, "grad_norm": 1.0223811864852905, "learning_rate": 0.00047799999999999996, "loss": 1.5162, "step": 240 }, { "epoch": 1.5337423312883436, "grad_norm": 0.8784969449043274, "learning_rate": 0.000498, "loss": 1.5689, "step": 250 }, { "epoch": 1.5950920245398774, "grad_norm": 0.8144865036010742, "learning_rate": 0.000518, "loss": 1.6243, "step": 260 }, { "epoch": 1.656441717791411, "grad_norm": 0.846225380897522, "learning_rate": 0.0005380000000000001, "loss": 1.5596, "step": 270 }, { "epoch": 1.7177914110429446, "grad_norm": 0.9448590278625488, "learning_rate": 0.000558, "loss": 1.399, "step": 280 }, { "epoch": 1.7791411042944785, "grad_norm": 0.9131848812103271, "learning_rate": 0.000578, "loss": 1.6336, "step": 290 }, { "epoch": 1.8404907975460123, "grad_norm": 0.8644697070121765, "learning_rate": 0.000598, "loss": 1.6112, "step": 300 }, { "epoch": 1.9018404907975461, "grad_norm": 0.964495062828064, "learning_rate": 0.0006180000000000001, "loss": 1.5901, "step": 310 }, { "epoch": 1.9631901840490797, "grad_norm": 0.8454228043556213, "learning_rate": 0.000638, "loss": 1.609, "step": 320 }, { "epoch": 2.0, "eval_gen_len": 187.0146, "eval_loss": 1.5841516256332397, "eval_rouge1": 0.6218, "eval_rouge2": 0.2773, "eval_rougeL": 0.4814, "eval_runtime": 30.1539, "eval_samples_per_second": 9.087, "eval_steps_per_second": 1.161, "step": 326 } ], "logging_steps": 10, "max_steps": 815, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1780369116364800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }