{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99777942264989, "global_step": 16850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 1.7639, "eval_gen_len": 17.0476, "eval_loss": 2.9241816997528076, "eval_runtime": 161.7821, "eval_samples_per_second": 34.93, "eval_steps_per_second": 1.749, "step": 337 }, { "epoch": 1.48, "learning_rate": 4.851632047477745e-05, "loss": 3.1098, "step": 500 }, { "epoch": 2.0, "eval_bleu": 1.7951, "eval_gen_len": 15.622, "eval_loss": 2.894585609436035, "eval_runtime": 158.8854, "eval_samples_per_second": 35.567, "eval_steps_per_second": 1.781, "step": 674 }, { "epoch": 2.97, "learning_rate": 4.70326409495549e-05, "loss": 2.8138, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 1.9954, "eval_gen_len": 16.75, "eval_loss": 2.8898093700408936, "eval_runtime": 161.5184, "eval_samples_per_second": 34.987, "eval_steps_per_second": 1.752, "step": 1011 }, { "epoch": 4.0, "eval_bleu": 1.9243, "eval_gen_len": 16.4229, "eval_loss": 2.903644323348999, "eval_runtime": 160.1198, "eval_samples_per_second": 35.292, "eval_steps_per_second": 1.767, "step": 1348 }, { "epoch": 4.45, "learning_rate": 4.554896142433235e-05, "loss": 2.594, "step": 1500 }, { "epoch": 5.0, "eval_bleu": 1.9376, "eval_gen_len": 16.8443, "eval_loss": 2.9253323078155518, "eval_runtime": 159.7166, "eval_samples_per_second": 35.381, "eval_steps_per_second": 1.772, "step": 1685 }, { "epoch": 5.93, "learning_rate": 4.4065281899109794e-05, "loss": 2.4374, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 2.0055, "eval_gen_len": 16.2769, "eval_loss": 2.9406914710998535, "eval_runtime": 159.0512, "eval_samples_per_second": 35.529, "eval_steps_per_second": 1.779, "step": 2022 }, { "epoch": 7.0, "eval_bleu": 1.9174, "eval_gen_len": 15.7742, "eval_loss": 2.978670358657837, "eval_runtime": 158.3136, "eval_samples_per_second": 35.695, "eval_steps_per_second": 1.788, "step": 2359 }, { "epoch": 7.42, "learning_rate": 4.258160237388724e-05, "loss": 2.2623, "step": 2500 }, { "epoch": 8.0, "eval_bleu": 1.9378, "eval_gen_len": 15.8482, "eval_loss": 3.0429296493530273, "eval_runtime": 157.9591, "eval_samples_per_second": 35.775, "eval_steps_per_second": 1.792, "step": 2696 }, { "epoch": 8.9, "learning_rate": 4.109792284866469e-05, "loss": 2.1323, "step": 3000 }, { "epoch": 9.0, "eval_bleu": 1.9663, "eval_gen_len": 16.4675, "eval_loss": 3.058178663253784, "eval_runtime": 158.2119, "eval_samples_per_second": 35.718, "eval_steps_per_second": 1.789, "step": 3033 }, { "epoch": 10.0, "eval_bleu": 1.9412, "eval_gen_len": 16.0646, "eval_loss": 3.107869863510132, "eval_runtime": 157.8461, "eval_samples_per_second": 35.801, "eval_steps_per_second": 1.793, "step": 3370 }, { "epoch": 10.38, "learning_rate": 3.961424332344214e-05, "loss": 1.9817, "step": 3500 }, { "epoch": 11.0, "eval_bleu": 2.0972, "eval_gen_len": 16.4849, "eval_loss": 3.1683783531188965, "eval_runtime": 158.1876, "eval_samples_per_second": 35.723, "eval_steps_per_second": 1.789, "step": 3707 }, { "epoch": 11.87, "learning_rate": 3.8130563798219586e-05, "loss": 1.864, "step": 4000 }, { "epoch": 12.0, "eval_bleu": 1.8631, "eval_gen_len": 16.3084, "eval_loss": 3.224560022354126, "eval_runtime": 158.1833, "eval_samples_per_second": 35.724, "eval_steps_per_second": 1.789, "step": 4044 }, { "epoch": 13.0, "eval_bleu": 1.9812, "eval_gen_len": 16.2702, "eval_loss": 3.256418228149414, "eval_runtime": 157.9844, "eval_samples_per_second": 35.769, "eval_steps_per_second": 1.791, "step": 4381 }, { "epoch": 13.35, "learning_rate": 3.664688427299703e-05, "loss": 1.7319, "step": 4500 }, { "epoch": 14.0, "eval_bleu": 1.8518, "eval_gen_len": 16.6208, "eval_loss": 3.321943759918213, "eval_runtime": 158.399, "eval_samples_per_second": 35.676, "eval_steps_per_second": 1.787, "step": 4718 }, { "epoch": 14.83, "learning_rate": 3.516320474777448e-05, "loss": 1.6291, "step": 5000 }, { "epoch": 15.0, "eval_bleu": 1.8638, "eval_gen_len": 15.8876, "eval_loss": 3.3599555492401123, "eval_runtime": 157.6851, "eval_samples_per_second": 35.837, "eval_steps_per_second": 1.795, "step": 5055 }, { "epoch": 16.0, "eval_bleu": 1.9703, "eval_gen_len": 16.2079, "eval_loss": 3.4334099292755127, "eval_runtime": 157.9443, "eval_samples_per_second": 35.778, "eval_steps_per_second": 1.792, "step": 5392 }, { "epoch": 16.32, "learning_rate": 3.3679525222551934e-05, "loss": 1.5185, "step": 5500 }, { "epoch": 17.0, "eval_bleu": 1.67, "eval_gen_len": 15.4355, "eval_loss": 3.4718282222747803, "eval_runtime": 157.5031, "eval_samples_per_second": 35.879, "eval_steps_per_second": 1.797, "step": 5729 }, { "epoch": 17.8, "learning_rate": 3.219584569732938e-05, "loss": 1.423, "step": 6000 }, { "epoch": 18.0, "eval_bleu": 1.834, "eval_gen_len": 16.5098, "eval_loss": 3.5062952041625977, "eval_runtime": 158.3097, "eval_samples_per_second": 35.696, "eval_steps_per_second": 1.788, "step": 6066 }, { "epoch": 19.0, "eval_bleu": 1.6657, "eval_gen_len": 16.0078, "eval_loss": 3.5681209564208984, "eval_runtime": 157.9659, "eval_samples_per_second": 35.774, "eval_steps_per_second": 1.792, "step": 6403 }, { "epoch": 19.29, "learning_rate": 3.071216617210683e-05, "loss": 1.3274, "step": 6500 }, { "epoch": 20.0, "eval_bleu": 1.9141, "eval_gen_len": 16.8841, "eval_loss": 3.612222194671631, "eval_runtime": 158.8209, "eval_samples_per_second": 35.581, "eval_steps_per_second": 1.782, "step": 6740 }, { "epoch": 20.77, "learning_rate": 2.9228486646884274e-05, "loss": 1.2461, "step": 7000 }, { "epoch": 21.0, "eval_bleu": 1.7505, "eval_gen_len": 16.1793, "eval_loss": 3.6515696048736572, "eval_runtime": 158.234, "eval_samples_per_second": 35.713, "eval_steps_per_second": 1.788, "step": 7077 }, { "epoch": 22.0, "eval_bleu": 1.6842, "eval_gen_len": 16.2152, "eval_loss": 3.6844277381896973, "eval_runtime": 158.2358, "eval_samples_per_second": 35.713, "eval_steps_per_second": 1.788, "step": 7414 }, { "epoch": 22.25, "learning_rate": 2.774480712166172e-05, "loss": 1.1661, "step": 7500 }, { "epoch": 23.0, "eval_bleu": 1.6847, "eval_gen_len": 16.0894, "eval_loss": 3.7410662174224854, "eval_runtime": 158.234, "eval_samples_per_second": 35.713, "eval_steps_per_second": 1.788, "step": 7751 }, { "epoch": 23.74, "learning_rate": 2.6261127596439174e-05, "loss": 1.092, "step": 8000 }, { "epoch": 24.0, "eval_bleu": 1.6662, "eval_gen_len": 16.6121, "eval_loss": 3.7708585262298584, "eval_runtime": 158.583, "eval_samples_per_second": 35.634, "eval_steps_per_second": 1.785, "step": 8088 }, { "epoch": 25.0, "eval_bleu": 1.7985, "eval_gen_len": 16.3143, "eval_loss": 3.8098456859588623, "eval_runtime": 158.302, "eval_samples_per_second": 35.698, "eval_steps_per_second": 1.788, "step": 8425 }, { "epoch": 25.22, "learning_rate": 2.4777448071216618e-05, "loss": 1.0302, "step": 8500 }, { "epoch": 26.0, "eval_bleu": 1.7239, "eval_gen_len": 16.3467, "eval_loss": 3.866185426712036, "eval_runtime": 158.3049, "eval_samples_per_second": 35.697, "eval_steps_per_second": 1.788, "step": 8762 }, { "epoch": 26.7, "learning_rate": 2.3293768545994066e-05, "loss": 0.9626, "step": 9000 }, { "epoch": 27.0, "eval_bleu": 1.7687, "eval_gen_len": 16.8252, "eval_loss": 3.9026572704315186, "eval_runtime": 158.776, "eval_samples_per_second": 35.591, "eval_steps_per_second": 1.782, "step": 9099 }, { "epoch": 28.0, "eval_bleu": 1.6071, "eval_gen_len": 16.8395, "eval_loss": 3.9381048679351807, "eval_runtime": 158.8794, "eval_samples_per_second": 35.568, "eval_steps_per_second": 1.781, "step": 9436 }, { "epoch": 28.19, "learning_rate": 2.1810089020771514e-05, "loss": 0.9076, "step": 9500 }, { "epoch": 29.0, "eval_bleu": 1.7271, "eval_gen_len": 16.5845, "eval_loss": 3.9773497581481934, "eval_runtime": 158.5329, "eval_samples_per_second": 35.646, "eval_steps_per_second": 1.785, "step": 9773 }, { "epoch": 29.67, "learning_rate": 2.0326409495548962e-05, "loss": 0.8523, "step": 10000 }, { "epoch": 30.0, "eval_bleu": 1.6689, "eval_gen_len": 16.5388, "eval_loss": 3.987967014312744, "eval_runtime": 158.4608, "eval_samples_per_second": 35.662, "eval_steps_per_second": 1.786, "step": 10110 }, { "epoch": 31.0, "eval_bleu": 1.6433, "eval_gen_len": 16.6489, "eval_loss": 4.056297302246094, "eval_runtime": 158.7288, "eval_samples_per_second": 35.602, "eval_steps_per_second": 1.783, "step": 10447 }, { "epoch": 31.16, "learning_rate": 1.884272997032641e-05, "loss": 0.8051, "step": 10500 }, { "epoch": 32.0, "eval_bleu": 1.6298, "eval_gen_len": 16.6305, "eval_loss": 4.071838855743408, "eval_runtime": 158.6637, "eval_samples_per_second": 35.616, "eval_steps_per_second": 1.784, "step": 10784 }, { "epoch": 32.64, "learning_rate": 1.7359050445103858e-05, "loss": 0.7621, "step": 11000 }, { "epoch": 33.0, "eval_bleu": 1.5555, "eval_gen_len": 16.7367, "eval_loss": 4.104060649871826, "eval_runtime": 158.7329, "eval_samples_per_second": 35.601, "eval_steps_per_second": 1.783, "step": 11121 }, { "epoch": 34.0, "eval_bleu": 1.5922, "eval_gen_len": 17.4091, "eval_loss": 4.128782272338867, "eval_runtime": 159.2897, "eval_samples_per_second": 35.476, "eval_steps_per_second": 1.777, "step": 11458 }, { "epoch": 34.12, "learning_rate": 1.5875370919881306e-05, "loss": 0.7257, "step": 11500 }, { "epoch": 35.0, "eval_bleu": 1.6243, "eval_gen_len": 16.8432, "eval_loss": 4.1702704429626465, "eval_runtime": 158.8223, "eval_samples_per_second": 35.581, "eval_steps_per_second": 1.782, "step": 11795 }, { "epoch": 35.61, "learning_rate": 1.4391691394658754e-05, "loss": 0.6835, "step": 12000 }, { "epoch": 36.0, "eval_bleu": 1.589, "eval_gen_len": 17.0446, "eval_loss": 4.1908135414123535, "eval_runtime": 159.0745, "eval_samples_per_second": 35.524, "eval_steps_per_second": 1.779, "step": 12132 }, { "epoch": 37.0, "eval_bleu": 1.5549, "eval_gen_len": 17.0649, "eval_loss": 4.204710006713867, "eval_runtime": 159.0216, "eval_samples_per_second": 35.536, "eval_steps_per_second": 1.78, "step": 12469 }, { "epoch": 37.09, "learning_rate": 1.29080118694362e-05, "loss": 0.6586, "step": 12500 }, { "epoch": 38.0, "eval_bleu": 1.5035, "eval_gen_len": 16.8954, "eval_loss": 4.236845970153809, "eval_runtime": 158.8765, "eval_samples_per_second": 35.569, "eval_steps_per_second": 1.781, "step": 12806 }, { "epoch": 38.57, "learning_rate": 1.142433234421365e-05, "loss": 0.6247, "step": 13000 }, { "epoch": 39.0, "eval_bleu": 1.5845, "eval_gen_len": 16.7616, "eval_loss": 4.257309436798096, "eval_runtime": 158.8065, "eval_samples_per_second": 35.584, "eval_steps_per_second": 1.782, "step": 13143 }, { "epoch": 40.0, "eval_bleu": 1.5144, "eval_gen_len": 16.803, "eval_loss": 4.269469738006592, "eval_runtime": 158.8495, "eval_samples_per_second": 35.575, "eval_steps_per_second": 1.782, "step": 13480 }, { "epoch": 40.06, "learning_rate": 9.940652818991098e-06, "loss": 0.604, "step": 13500 }, { "epoch": 41.0, "eval_bleu": 1.5398, "eval_gen_len": 17.1295, "eval_loss": 4.295245170593262, "eval_runtime": 159.0688, "eval_samples_per_second": 35.526, "eval_steps_per_second": 1.779, "step": 13817 }, { "epoch": 41.54, "learning_rate": 8.456973293768548e-06, "loss": 0.5781, "step": 14000 }, { "epoch": 42.0, "eval_bleu": 1.6057, "eval_gen_len": 17.0359, "eval_loss": 4.318183422088623, "eval_runtime": 158.942, "eval_samples_per_second": 35.554, "eval_steps_per_second": 1.781, "step": 14154 }, { "epoch": 43.0, "eval_bleu": 1.5114, "eval_gen_len": 16.6418, "eval_loss": 4.331396102905273, "eval_runtime": 158.745, "eval_samples_per_second": 35.598, "eval_steps_per_second": 1.783, "step": 14491 }, { "epoch": 43.03, "learning_rate": 6.973293768545995e-06, "loss": 0.5636, "step": 14500 }, { "epoch": 44.0, "eval_bleu": 1.5443, "eval_gen_len": 16.745, "eval_loss": 4.341202259063721, "eval_runtime": 158.7765, "eval_samples_per_second": 35.591, "eval_steps_per_second": 1.782, "step": 14828 }, { "epoch": 44.51, "learning_rate": 5.489614243323442e-06, "loss": 0.5429, "step": 15000 }, { "epoch": 45.0, "eval_bleu": 1.5719, "eval_gen_len": 16.8237, "eval_loss": 4.341938495635986, "eval_runtime": 158.7306, "eval_samples_per_second": 35.601, "eval_steps_per_second": 1.783, "step": 15165 }, { "epoch": 45.99, "learning_rate": 4.005934718100891e-06, "loss": 0.5309, "step": 15500 }, { "epoch": 46.0, "eval_bleu": 1.5445, "eval_gen_len": 16.7997, "eval_loss": 4.355504512786865, "eval_runtime": 158.7779, "eval_samples_per_second": 35.591, "eval_steps_per_second": 1.782, "step": 15502 }, { "epoch": 47.0, "eval_bleu": 1.5689, "eval_gen_len": 16.8358, "eval_loss": 4.362900733947754, "eval_runtime": 158.7996, "eval_samples_per_second": 35.586, "eval_steps_per_second": 1.782, "step": 15839 }, { "epoch": 47.48, "learning_rate": 2.5222551928783385e-06, "loss": 0.5206, "step": 16000 }, { "epoch": 48.0, "eval_bleu": 1.5208, "eval_gen_len": 17.0039, "eval_loss": 4.370855808258057, "eval_runtime": 158.9053, "eval_samples_per_second": 35.562, "eval_steps_per_second": 1.781, "step": 16176 }, { "epoch": 48.96, "learning_rate": 1.0385756676557863e-06, "loss": 0.51, "step": 16500 }, { "epoch": 49.0, "eval_bleu": 1.5581, "eval_gen_len": 16.8577, "eval_loss": 4.374205589294434, "eval_runtime": 158.7365, "eval_samples_per_second": 35.6, "eval_steps_per_second": 1.783, "step": 16513 }, { "epoch": 50.0, "eval_bleu": 1.5873, "eval_gen_len": 16.9289, "eval_loss": 4.376501083374023, "eval_runtime": 158.7713, "eval_samples_per_second": 35.592, "eval_steps_per_second": 1.782, "step": 16850 }, { "epoch": 50.0, "step": 16850, "total_flos": 1.8872801391015936e+17, "train_loss": 1.262480049472888, "train_runtime": 34907.6333, "train_samples_per_second": 38.699, "train_steps_per_second": 0.483 } ], "max_steps": 16850, "num_train_epochs": 50, "total_flos": 1.8872801391015936e+17, "trial_name": null, "trial_params": null }