{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.12621359223301, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 46.9286, "eval_gen_len": 17.6388, "eval_loss": 0.2930362820625305, "eval_meteor": 0.6401, "eval_runtime": 21.9119, "eval_samples_per_second": 24.005, "eval_steps_per_second": 1.232, "step": 206 }, { "epoch": 2.0, "eval_bleu": 55.0709, "eval_gen_len": 17.481, "eval_loss": 0.17119793593883514, "eval_meteor": 0.7436, "eval_runtime": 17.1858, "eval_samples_per_second": 30.607, "eval_steps_per_second": 1.571, "step": 412 }, { "epoch": 2.43, "learning_rate": 0.0002757281553398058, "loss": 1.4069, "step": 500 }, { "epoch": 3.0, "eval_bleu": 56.5722, "eval_gen_len": 17.635, "eval_loss": 0.12680290639400482, "eval_meteor": 0.7412, "eval_runtime": 17.5013, "eval_samples_per_second": 30.055, "eval_steps_per_second": 1.543, "step": 618 }, { "epoch": 4.0, "eval_bleu": 59.7915, "eval_gen_len": 17.5513, "eval_loss": 0.10222692042589188, "eval_meteor": 0.7776, "eval_runtime": 17.1596, "eval_samples_per_second": 30.653, "eval_steps_per_second": 1.573, "step": 824 }, { "epoch": 4.85, "learning_rate": 0.00025145631067961165, "loss": 0.2339, "step": 1000 }, { "epoch": 5.0, "eval_bleu": 60.1489, "eval_gen_len": 17.6255, "eval_loss": 0.09315221011638641, "eval_meteor": 0.7764, "eval_runtime": 17.1827, "eval_samples_per_second": 30.612, "eval_steps_per_second": 1.571, "step": 1030 }, { "epoch": 6.0, "eval_bleu": 60.441, "eval_gen_len": 17.6749, "eval_loss": 0.08468244969844818, "eval_meteor": 0.7696, "eval_runtime": 17.5475, "eval_samples_per_second": 29.976, "eval_steps_per_second": 1.539, "step": 1236 }, { "epoch": 7.0, "eval_bleu": 61.5099, "eval_gen_len": 17.6027, "eval_loss": 0.07199732959270477, "eval_meteor": 0.7875, "eval_runtime": 17.5698, "eval_samples_per_second": 29.938, "eval_steps_per_second": 1.537, "step": 1442 }, { "epoch": 7.28, "learning_rate": 0.00022718446601941746, "loss": 0.1661, "step": 1500 }, { "epoch": 8.0, "eval_bleu": 61.471, "eval_gen_len": 17.6901, "eval_loss": 0.07024037092924118, "eval_meteor": 0.7778, "eval_runtime": 17.3131, "eval_samples_per_second": 30.382, "eval_steps_per_second": 1.56, "step": 1648 }, { "epoch": 9.0, "eval_bleu": 62.8426, "eval_gen_len": 17.6274, "eval_loss": 0.05566093325614929, "eval_meteor": 0.7954, "eval_runtime": 18.4096, "eval_samples_per_second": 28.572, "eval_steps_per_second": 1.467, "step": 1854 }, { "epoch": 9.71, "learning_rate": 0.00020291262135922327, "loss": 0.1289, "step": 2000 }, { "epoch": 10.0, "eval_bleu": 62.5415, "eval_gen_len": 17.6825, "eval_loss": 0.05298692733049393, "eval_meteor": 0.7883, "eval_runtime": 17.3063, "eval_samples_per_second": 30.394, "eval_steps_per_second": 1.56, "step": 2060 }, { "epoch": 11.0, "eval_bleu": 62.8065, "eval_gen_len": 17.711, "eval_loss": 0.049979548901319504, "eval_meteor": 0.7881, "eval_runtime": 17.359, "eval_samples_per_second": 30.301, "eval_steps_per_second": 1.555, "step": 2266 }, { "epoch": 12.0, "eval_bleu": 62.6083, "eval_gen_len": 17.7167, "eval_loss": 0.04917723312973976, "eval_meteor": 0.7826, "eval_runtime": 17.3203, "eval_samples_per_second": 30.369, "eval_steps_per_second": 1.559, "step": 2472 }, { "epoch": 12.14, "learning_rate": 0.00017864077669902913, "loss": 0.1034, "step": 2500 }, { "epoch": 13.0, "eval_bleu": 63.5012, "eval_gen_len": 17.6844, "eval_loss": 0.042322106659412384, "eval_meteor": 0.7943, "eval_runtime": 17.3469, "eval_samples_per_second": 30.322, "eval_steps_per_second": 1.556, "step": 2678 }, { "epoch": 14.0, "eval_bleu": 62.9858, "eval_gen_len": 17.7091, "eval_loss": 0.04112717881798744, "eval_meteor": 0.7876, "eval_runtime": 17.2586, "eval_samples_per_second": 30.478, "eval_steps_per_second": 1.564, "step": 2884 }, { "epoch": 14.56, "learning_rate": 0.00015436893203883494, "loss": 0.0862, "step": 3000 }, { "epoch": 15.0, "eval_bleu": 64.4301, "eval_gen_len": 17.6806, "eval_loss": 0.03431503847241402, "eval_meteor": 0.8036, "eval_runtime": 17.1867, "eval_samples_per_second": 30.605, "eval_steps_per_second": 1.571, "step": 3090 }, { "epoch": 16.0, "eval_bleu": 65.3347, "eval_gen_len": 17.654, "eval_loss": 0.031859882175922394, "eval_meteor": 0.8153, "eval_runtime": 17.2343, "eval_samples_per_second": 30.52, "eval_steps_per_second": 1.567, "step": 3296 }, { "epoch": 16.99, "learning_rate": 0.00013009708737864078, "loss": 0.073, "step": 3500 }, { "epoch": 17.0, "eval_bleu": 62.9474, "eval_gen_len": 17.7433, "eval_loss": 0.03299812600016594, "eval_meteor": 0.7817, "eval_runtime": 17.342, "eval_samples_per_second": 30.331, "eval_steps_per_second": 1.557, "step": 3502 }, { "epoch": 18.0, "eval_bleu": 66.3794, "eval_gen_len": 17.6217, "eval_loss": 0.02293582074344158, "eval_meteor": 0.8285, "eval_runtime": 17.2657, "eval_samples_per_second": 30.465, "eval_steps_per_second": 1.564, "step": 3708 }, { "epoch": 19.0, "eval_bleu": 64.1881, "eval_gen_len": 17.711, "eval_loss": 0.02707734704017639, "eval_meteor": 0.7974, "eval_runtime": 17.8576, "eval_samples_per_second": 29.455, "eval_steps_per_second": 1.512, "step": 3914 }, { "epoch": 19.42, "learning_rate": 0.00010582524271844659, "loss": 0.0609, "step": 4000 }, { "epoch": 20.0, "eval_bleu": 65.4997, "eval_gen_len": 17.6616, "eval_loss": 0.022358747199177742, "eval_meteor": 0.8156, "eval_runtime": 17.5005, "eval_samples_per_second": 30.056, "eval_steps_per_second": 1.543, "step": 4120 }, { "epoch": 21.0, "eval_bleu": 64.8991, "eval_gen_len": 17.6863, "eval_loss": 0.022536493837833405, "eval_meteor": 0.8061, "eval_runtime": 17.556, "eval_samples_per_second": 29.961, "eval_steps_per_second": 1.538, "step": 4326 }, { "epoch": 21.84, "learning_rate": 8.155339805825241e-05, "loss": 0.0538, "step": 4500 }, { "epoch": 22.0, "eval_bleu": 66.3962, "eval_gen_len": 17.6407, "eval_loss": 0.018802011385560036, "eval_meteor": 0.8251, "eval_runtime": 17.5884, "eval_samples_per_second": 29.906, "eval_steps_per_second": 1.535, "step": 4532 }, { "epoch": 23.0, "eval_bleu": 65.0479, "eval_gen_len": 17.6863, "eval_loss": 0.02161051519215107, "eval_meteor": 0.8069, "eval_runtime": 17.501, "eval_samples_per_second": 30.055, "eval_steps_per_second": 1.543, "step": 4738 }, { "epoch": 24.0, "eval_bleu": 66.0078, "eval_gen_len": 17.6559, "eval_loss": 0.01826297491788864, "eval_meteor": 0.8205, "eval_runtime": 17.773, "eval_samples_per_second": 29.595, "eval_steps_per_second": 1.519, "step": 4944 }, { "epoch": 24.27, "learning_rate": 5.728155339805825e-05, "loss": 0.0476, "step": 5000 }, { "epoch": 25.0, "eval_bleu": 65.1378, "eval_gen_len": 17.6863, "eval_loss": 0.021101167425513268, "eval_meteor": 0.8087, "eval_runtime": 17.5024, "eval_samples_per_second": 30.053, "eval_steps_per_second": 1.543, "step": 5150 }, { "epoch": 26.0, "eval_bleu": 65.5523, "eval_gen_len": 17.6749, "eval_loss": 0.018090983852744102, "eval_meteor": 0.8134, "eval_runtime": 17.7903, "eval_samples_per_second": 29.567, "eval_steps_per_second": 1.518, "step": 5356 }, { "epoch": 26.7, "learning_rate": 3.3009708737864073e-05, "loss": 0.044, "step": 5500 }, { "epoch": 27.0, "eval_bleu": 66.1951, "eval_gen_len": 17.6597, "eval_loss": 0.016722695901989937, "eval_meteor": 0.8212, "eval_runtime": 17.6724, "eval_samples_per_second": 29.764, "eval_steps_per_second": 1.528, "step": 5562 }, { "epoch": 28.0, "eval_bleu": 66.2812, "eval_gen_len": 17.6559, "eval_loss": 0.0156955998390913, "eval_meteor": 0.8226, "eval_runtime": 17.5211, "eval_samples_per_second": 30.021, "eval_steps_per_second": 1.541, "step": 5768 }, { "epoch": 29.0, "eval_bleu": 65.4079, "eval_gen_len": 17.6863, "eval_loss": 0.017170317471027374, "eval_meteor": 0.8103, "eval_runtime": 17.494, "eval_samples_per_second": 30.067, "eval_steps_per_second": 1.543, "step": 5974 }, { "epoch": 29.13, "learning_rate": 8.737864077669902e-06, "loss": 0.04, "step": 6000 } ], "logging_steps": 500, "max_steps": 6180, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.55075881506816e+16, "train_batch_size": 20, "trial_name": null, "trial_params": null }