{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 107670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.953561809231913e-05, "loss": 1.9585, "step": 1000 }, { "epoch": 0.06, "learning_rate": 4.9071236184638245e-05, "loss": 1.9147, "step": 2000 }, { "epoch": 0.08, "learning_rate": 4.860685427695737e-05, "loss": 1.8999, "step": 3000 }, { "epoch": 0.11, "learning_rate": 4.81424723692765e-05, "loss": 1.8889, "step": 4000 }, { "epoch": 0.14, "learning_rate": 4.767809046159562e-05, "loss": 1.8914, "step": 5000 }, { "epoch": 0.17, "learning_rate": 4.721370855391474e-05, "loss": 1.8724, "step": 6000 }, { "epoch": 0.2, "learning_rate": 4.674932664623387e-05, "loss": 1.8677, "step": 7000 }, { "epoch": 0.22, "learning_rate": 4.6284944738552985e-05, "loss": 1.8652, "step": 8000 }, { "epoch": 0.25, "learning_rate": 4.582056283087211e-05, "loss": 1.8709, "step": 9000 }, { "epoch": 0.28, "learning_rate": 4.5356180923191235e-05, "loss": 1.8435, "step": 10000 }, { "epoch": 0.28, "eval_gen_len": 18.99970077797726, "eval_loss": 1.6998443603515625, "eval_rouge1": 24.3321, "eval_rouge2": 11.599, "eval_rougeL": 20.1028, "eval_rougeLsum": 22.9562, "eval_runtime": 1013.7965, "eval_samples_per_second": 13.186, "eval_steps_per_second": 1.648, "step": 10000 }, { "epoch": 0.31, "learning_rate": 4.489179901551035e-05, "loss": 1.8587, "step": 11000 }, { "epoch": 0.33, "learning_rate": 4.4427417107829484e-05, "loss": 1.8536, "step": 12000 }, { "epoch": 0.36, "learning_rate": 4.396303520014861e-05, "loss": 1.8534, "step": 13000 }, { "epoch": 0.39, "learning_rate": 4.3498653292467726e-05, "loss": 1.8607, "step": 14000 }, { "epoch": 0.42, "learning_rate": 4.303427138478685e-05, "loss": 1.8586, "step": 15000 }, { "epoch": 0.45, "learning_rate": 4.2569889477105975e-05, "loss": 1.8486, "step": 16000 }, { "epoch": 0.47, "learning_rate": 4.210550756942509e-05, "loss": 1.8581, "step": 17000 }, { "epoch": 0.5, "learning_rate": 4.164112566174422e-05, "loss": 1.8484, "step": 18000 }, { "epoch": 0.53, "learning_rate": 4.117674375406335e-05, "loss": 1.8494, "step": 19000 }, { "epoch": 0.56, "learning_rate": 4.071236184638247e-05, "loss": 1.8464, "step": 20000 }, { "epoch": 0.56, "eval_gen_len": 18.999625972471573, "eval_loss": 1.6814035177230835, "eval_rouge1": 24.4483, "eval_rouge2": 11.6789, "eval_rougeL": 20.1798, "eval_rougeLsum": 23.0508, "eval_runtime": 1020.3814, "eval_samples_per_second": 13.101, "eval_steps_per_second": 1.638, "step": 20000 }, { "epoch": 0.59, "learning_rate": 4.024797993870159e-05, "loss": 1.8538, "step": 21000 }, { "epoch": 0.61, "learning_rate": 3.9783598031020716e-05, "loss": 1.8436, "step": 22000 }, { "epoch": 0.64, "learning_rate": 3.9319216123339834e-05, "loss": 1.8433, "step": 23000 }, { "epoch": 0.67, "learning_rate": 3.885483421565896e-05, "loss": 1.8513, "step": 24000 }, { "epoch": 0.7, "learning_rate": 3.839045230797808e-05, "loss": 1.8418, "step": 25000 }, { "epoch": 0.72, "learning_rate": 3.792607040029721e-05, "loss": 1.839, "step": 26000 }, { "epoch": 0.75, "learning_rate": 3.746168849261633e-05, "loss": 1.8525, "step": 27000 }, { "epoch": 0.78, "learning_rate": 3.6997306584935456e-05, "loss": 1.8191, "step": 28000 }, { "epoch": 0.81, "learning_rate": 3.6532924677254574e-05, "loss": 1.8209, "step": 29000 }, { "epoch": 0.84, "learning_rate": 3.60685427695737e-05, "loss": 1.8332, "step": 30000 }, { "epoch": 0.84, "eval_gen_len": 18.999401555954517, "eval_loss": 1.6737741231918335, "eval_rouge1": 24.5531, "eval_rouge2": 11.7949, "eval_rougeL": 20.2834, "eval_rougeLsum": 23.1588, "eval_runtime": 1031.0784, "eval_samples_per_second": 12.965, "eval_steps_per_second": 1.621, "step": 30000 }, { "epoch": 0.86, "learning_rate": 3.560416086189282e-05, "loss": 1.8397, "step": 31000 }, { "epoch": 0.89, "learning_rate": 3.513977895421194e-05, "loss": 1.8483, "step": 32000 }, { "epoch": 0.92, "learning_rate": 3.4675397046531066e-05, "loss": 1.8288, "step": 33000 }, { "epoch": 0.95, "learning_rate": 3.42110151388502e-05, "loss": 1.8258, "step": 34000 }, { "epoch": 0.98, "learning_rate": 3.3746633231169315e-05, "loss": 1.8288, "step": 35000 }, { "epoch": 1.0, "learning_rate": 3.328225132348844e-05, "loss": 1.8425, "step": 36000 }, { "epoch": 1.03, "learning_rate": 3.2817869415807564e-05, "loss": 1.8162, "step": 37000 }, { "epoch": 1.06, "learning_rate": 3.235348750812668e-05, "loss": 1.8145, "step": 38000 }, { "epoch": 1.09, "learning_rate": 3.1889105600445806e-05, "loss": 1.8169, "step": 39000 }, { "epoch": 1.11, "learning_rate": 3.142472369276493e-05, "loss": 1.8054, "step": 40000 }, { "epoch": 1.11, "eval_gen_len": 18.999102333931777, "eval_loss": 1.6636024713516235, "eval_rouge1": 24.6194, "eval_rouge2": 11.843, "eval_rougeL": 20.3375, "eval_rougeLsum": 23.2259, "eval_runtime": 1028.3007, "eval_samples_per_second": 13.0, "eval_steps_per_second": 1.625, "step": 40000 }, { "epoch": 1.14, "learning_rate": 3.0960341785084055e-05, "loss": 1.8084, "step": 41000 }, { "epoch": 1.17, "learning_rate": 3.049595987740318e-05, "loss": 1.8025, "step": 42000 }, { "epoch": 1.2, "learning_rate": 3.00315779697223e-05, "loss": 1.8277, "step": 43000 }, { "epoch": 1.23, "learning_rate": 2.9567196062041426e-05, "loss": 1.8109, "step": 44000 }, { "epoch": 1.25, "learning_rate": 2.9102814154360547e-05, "loss": 1.8148, "step": 45000 }, { "epoch": 1.28, "learning_rate": 2.8638432246679668e-05, "loss": 1.8002, "step": 46000 }, { "epoch": 1.31, "learning_rate": 2.8174050338998793e-05, "loss": 1.8094, "step": 47000 }, { "epoch": 1.34, "learning_rate": 2.7709668431317914e-05, "loss": 1.8049, "step": 48000 }, { "epoch": 1.37, "learning_rate": 2.7245286523637042e-05, "loss": 1.8071, "step": 49000 }, { "epoch": 1.39, "learning_rate": 2.6780904615956166e-05, "loss": 1.7958, "step": 50000 }, { "epoch": 1.39, "eval_gen_len": 18.999775583482943, "eval_loss": 1.6596847772598267, "eval_rouge1": 24.5017, "eval_rouge2": 11.7755, "eval_rougeL": 20.2439, "eval_rougeLsum": 23.1148, "eval_runtime": 1021.876, "eval_samples_per_second": 13.082, "eval_steps_per_second": 1.635, "step": 50000 }, { "epoch": 1.42, "learning_rate": 2.6316522708275288e-05, "loss": 1.8124, "step": 51000 }, { "epoch": 1.45, "learning_rate": 2.585214080059441e-05, "loss": 1.8052, "step": 52000 }, { "epoch": 1.48, "learning_rate": 2.5387758892913533e-05, "loss": 1.8118, "step": 53000 }, { "epoch": 1.5, "learning_rate": 2.4923376985232658e-05, "loss": 1.8137, "step": 54000 }, { "epoch": 1.53, "learning_rate": 2.445899507755178e-05, "loss": 1.804, "step": 55000 }, { "epoch": 1.56, "learning_rate": 2.3994613169870904e-05, "loss": 1.7957, "step": 56000 }, { "epoch": 1.59, "learning_rate": 2.3530231262190025e-05, "loss": 1.7922, "step": 57000 }, { "epoch": 1.62, "learning_rate": 2.306584935450915e-05, "loss": 1.7975, "step": 58000 }, { "epoch": 1.64, "learning_rate": 2.2601467446828274e-05, "loss": 1.8092, "step": 59000 }, { "epoch": 1.67, "learning_rate": 2.2137085539147395e-05, "loss": 1.8095, "step": 60000 }, { "epoch": 1.67, "eval_gen_len": 18.999925194494313, "eval_loss": 1.6546396017074585, "eval_rouge1": 24.5126, "eval_rouge2": 11.8043, "eval_rougeL": 20.2603, "eval_rougeLsum": 23.1175, "eval_runtime": 1022.4979, "eval_samples_per_second": 13.074, "eval_steps_per_second": 1.634, "step": 60000 }, { "epoch": 1.7, "learning_rate": 2.1672703631466516e-05, "loss": 1.8038, "step": 61000 }, { "epoch": 1.73, "learning_rate": 2.1208321723785644e-05, "loss": 1.7997, "step": 62000 }, { "epoch": 1.76, "learning_rate": 2.0743939816104765e-05, "loss": 1.7984, "step": 63000 }, { "epoch": 1.78, "learning_rate": 2.0279557908423887e-05, "loss": 1.8107, "step": 64000 }, { "epoch": 1.81, "learning_rate": 1.9815176000743015e-05, "loss": 1.8042, "step": 65000 }, { "epoch": 1.84, "learning_rate": 1.9350794093062136e-05, "loss": 1.8072, "step": 66000 }, { "epoch": 1.87, "learning_rate": 1.8886412185381257e-05, "loss": 1.7904, "step": 67000 }, { "epoch": 1.89, "learning_rate": 1.842203027770038e-05, "loss": 1.8062, "step": 68000 }, { "epoch": 1.92, "learning_rate": 1.7957648370019506e-05, "loss": 1.7869, "step": 69000 }, { "epoch": 1.95, "learning_rate": 1.7493266462338627e-05, "loss": 1.8127, "step": 70000 }, { "epoch": 1.95, "eval_gen_len": 18.99985038898863, "eval_loss": 1.6521377563476562, "eval_rouge1": 24.4845, "eval_rouge2": 11.8136, "eval_rougeL": 20.2557, "eval_rougeLsum": 23.1089, "eval_runtime": 1036.9273, "eval_samples_per_second": 12.892, "eval_steps_per_second": 1.611, "step": 70000 }, { "epoch": 1.98, "learning_rate": 1.7028884554657752e-05, "loss": 1.7962, "step": 71000 }, { "epoch": 2.01, "learning_rate": 1.6564502646976873e-05, "loss": 1.7881, "step": 72000 }, { "epoch": 2.03, "learning_rate": 1.6100120739295998e-05, "loss": 1.7922, "step": 73000 }, { "epoch": 2.06, "learning_rate": 1.5635738831615122e-05, "loss": 1.784, "step": 74000 }, { "epoch": 2.09, "learning_rate": 1.5171356923934243e-05, "loss": 1.7937, "step": 75000 }, { "epoch": 2.12, "learning_rate": 1.470697501625337e-05, "loss": 1.7902, "step": 76000 }, { "epoch": 2.15, "learning_rate": 1.424259310857249e-05, "loss": 1.7866, "step": 77000 }, { "epoch": 2.17, "learning_rate": 1.3778211200891614e-05, "loss": 1.7861, "step": 78000 }, { "epoch": 2.2, "learning_rate": 1.3313829293210736e-05, "loss": 1.7885, "step": 79000 }, { "epoch": 2.23, "learning_rate": 1.2849447385529861e-05, "loss": 1.7952, "step": 80000 }, { "epoch": 2.23, "eval_gen_len": 18.99955116696589, "eval_loss": 1.648803949356079, "eval_rouge1": 24.6217, "eval_rouge2": 11.8877, "eval_rougeL": 20.3555, "eval_rougeLsum": 23.2514, "eval_runtime": 1021.958, "eval_samples_per_second": 13.081, "eval_steps_per_second": 1.635, "step": 80000 }, { "epoch": 2.26, "learning_rate": 1.2385065477848984e-05, "loss": 1.7879, "step": 81000 }, { "epoch": 2.28, "learning_rate": 1.1920683570168107e-05, "loss": 1.7827, "step": 82000 }, { "epoch": 2.31, "learning_rate": 1.145630166248723e-05, "loss": 1.7984, "step": 83000 }, { "epoch": 2.34, "learning_rate": 1.0991919754806353e-05, "loss": 1.7778, "step": 84000 }, { "epoch": 2.37, "learning_rate": 1.0527537847125477e-05, "loss": 1.7784, "step": 85000 }, { "epoch": 2.4, "learning_rate": 1.00631559394446e-05, "loss": 1.7925, "step": 86000 }, { "epoch": 2.42, "learning_rate": 9.598774031763723e-06, "loss": 1.7724, "step": 87000 }, { "epoch": 2.45, "learning_rate": 9.134392124082847e-06, "loss": 1.7759, "step": 88000 }, { "epoch": 2.48, "learning_rate": 8.670010216401969e-06, "loss": 1.7764, "step": 89000 }, { "epoch": 2.51, "learning_rate": 8.205628308721093e-06, "loss": 1.7863, "step": 90000 }, { "epoch": 2.51, "eval_gen_len": 18.999625972471573, "eval_loss": 1.6477200984954834, "eval_rouge1": 24.5616, "eval_rouge2": 11.8489, "eval_rougeL": 20.3021, "eval_rougeLsum": 23.1754, "eval_runtime": 1025.9829, "eval_samples_per_second": 13.029, "eval_steps_per_second": 1.629, "step": 90000 }, { "epoch": 2.54, "learning_rate": 7.741246401040216e-06, "loss": 1.7904, "step": 91000 }, { "epoch": 2.56, "learning_rate": 7.27686449335934e-06, "loss": 1.7885, "step": 92000 }, { "epoch": 2.59, "learning_rate": 6.812482585678462e-06, "loss": 1.7925, "step": 93000 }, { "epoch": 2.62, "learning_rate": 6.3481006779975855e-06, "loss": 1.7768, "step": 94000 }, { "epoch": 2.65, "learning_rate": 5.883718770316709e-06, "loss": 1.7815, "step": 95000 }, { "epoch": 2.67, "learning_rate": 5.419336862635832e-06, "loss": 1.7835, "step": 96000 }, { "epoch": 2.7, "learning_rate": 4.954954954954955e-06, "loss": 1.7907, "step": 97000 }, { "epoch": 2.73, "learning_rate": 4.490573047274079e-06, "loss": 1.8067, "step": 98000 }, { "epoch": 2.76, "learning_rate": 4.0261911395932016e-06, "loss": 1.7848, "step": 99000 }, { "epoch": 2.79, "learning_rate": 3.561809231912325e-06, "loss": 1.7824, "step": 100000 }, { "epoch": 2.79, "eval_gen_len": 18.999775583482943, "eval_loss": 1.6464020013809204, "eval_rouge1": 24.5852, "eval_rouge2": 11.8531, "eval_rougeL": 20.3172, "eval_rougeLsum": 23.2089, "eval_runtime": 1030.7018, "eval_samples_per_second": 12.97, "eval_steps_per_second": 1.621, "step": 100000 }, { "epoch": 2.81, "learning_rate": 3.097427324231448e-06, "loss": 1.7836, "step": 101000 }, { "epoch": 2.84, "learning_rate": 2.6330454165505714e-06, "loss": 1.7817, "step": 102000 }, { "epoch": 2.87, "learning_rate": 2.1686635088696947e-06, "loss": 1.7813, "step": 103000 }, { "epoch": 2.9, "learning_rate": 1.7042816011888178e-06, "loss": 1.7979, "step": 104000 }, { "epoch": 2.93, "learning_rate": 1.239899693507941e-06, "loss": 1.7842, "step": 105000 }, { "epoch": 2.95, "learning_rate": 7.755177858270642e-07, "loss": 1.7754, "step": 106000 }, { "epoch": 2.98, "learning_rate": 3.1113587814618745e-07, "loss": 1.7827, "step": 107000 }, { "epoch": 3.0, "step": 107670, "total_flos": 2.3279391489785856e+17, "train_loss": 1.815847429410322, "train_runtime": 60006.6565, "train_samples_per_second": 14.354, "train_steps_per_second": 1.794 } ], "max_steps": 107670, "num_train_epochs": 3, "total_flos": 2.3279391489785856e+17, "trial_name": null, "trial_params": null }