{ "best_metric": 42.7722, "best_model_checkpoint": "opus_base_adapt_wce_gloss_unsampled_precision_3_ubweight_1.25/checkpoint-80000", "epoch": 3.943217665615142, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2076, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 35.6766, "eval_gen_len": 39.8754, "eval_loss": 0.10901036113500595, "eval_runtime": 190.0619, "eval_samples_per_second": 5.488, "eval_steps_per_second": 0.174, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1743, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 37.712, "eval_gen_len": 37.931, "eval_loss": 0.10654148459434509, "eval_runtime": 187.1025, "eval_samples_per_second": 5.574, "eval_steps_per_second": 0.176, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1672, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.1339, "eval_gen_len": 34.9118, "eval_loss": 0.10531440377235413, "eval_runtime": 148.6317, "eval_samples_per_second": 7.017, "eval_steps_per_second": 0.222, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1618, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.355, "eval_gen_len": 35.2416, "eval_loss": 0.10421621054410934, "eval_runtime": 124.1237, "eval_samples_per_second": 8.403, "eval_steps_per_second": 0.266, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1594, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.6651, "eval_gen_len": 35.4228, "eval_loss": 0.10366757214069366, "eval_runtime": 159.7806, "eval_samples_per_second": 6.528, "eval_steps_per_second": 0.207, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1477, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.9522, "eval_gen_len": 35.0451, "eval_loss": 0.10360125452280045, "eval_runtime": 138.0407, "eval_samples_per_second": 7.556, "eval_steps_per_second": 0.239, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.827545839905363e-05, "loss": 0.1475, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 40.2121, "eval_gen_len": 36.4669, "eval_loss": 0.10278471559286118, "eval_runtime": 128.7397, "eval_samples_per_second": 8.102, "eval_steps_per_second": 0.256, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.147, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 41.9375, "eval_gen_len": 35.2301, "eval_loss": 0.10222012549638748, "eval_runtime": 132.4166, "eval_samples_per_second": 7.877, "eval_steps_per_second": 0.249, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782802641955836e-05, "loss": 0.1456, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.9742, "eval_gen_len": 37.1965, "eval_loss": 0.10183104127645493, "eval_runtime": 119.5559, "eval_samples_per_second": 8.724, "eval_steps_per_second": 0.276, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536474763406942e-05, "loss": 0.1448, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 41.7528, "eval_gen_len": 35.7248, "eval_loss": 0.10195966809988022, "eval_runtime": 123.0307, "eval_samples_per_second": 8.478, "eval_steps_per_second": 0.268, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.7290146884858045e-05, "loss": 0.1372, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 42.1909, "eval_gen_len": 35.2924, "eval_loss": 0.1022593304514885, "eval_runtime": 159.0132, "eval_samples_per_second": 6.559, "eval_steps_per_second": 0.208, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.7043757393533123e-05, "loss": 0.1363, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.6162, "eval_gen_len": 34.9866, "eval_loss": 0.10216603428125381, "eval_runtime": 116.2982, "eval_samples_per_second": 8.968, "eval_steps_per_second": 0.284, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.679742951498423e-05, "loss": 0.1352, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.4525, "eval_gen_len": 34.8888, "eval_loss": 0.10181364417076111, "eval_runtime": 134.9256, "eval_samples_per_second": 7.73, "eval_steps_per_second": 0.245, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6551101636435333e-05, "loss": 0.1355, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 41.9729, "eval_gen_len": 35.9051, "eval_loss": 0.10166899114847183, "eval_runtime": 135.9018, "eval_samples_per_second": 7.675, "eval_steps_per_second": 0.243, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.6304773757886436e-05, "loss": 0.1358, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.3275, "eval_gen_len": 34.8514, "eval_loss": 0.10106752812862396, "eval_runtime": 160.3679, "eval_samples_per_second": 6.504, "eval_steps_per_second": 0.206, "step": 60000 }, { "epoch": 3.15, "learning_rate": 1.6058445879337542e-05, "loss": 0.1294, "step": 64000 }, { "epoch": 3.15, "eval_bleu": 42.2988, "eval_gen_len": 34.8188, "eval_loss": 0.10183101147413254, "eval_runtime": 137.4561, "eval_samples_per_second": 7.588, "eval_steps_per_second": 0.24, "step": 64000 }, { "epoch": 3.35, "learning_rate": 1.5812118000788645e-05, "loss": 0.1289, "step": 68000 }, { "epoch": 3.35, "eval_bleu": 42.7527, "eval_gen_len": 34.7766, "eval_loss": 0.10201847553253174, "eval_runtime": 136.3856, "eval_samples_per_second": 7.647, "eval_steps_per_second": 0.242, "step": 68000 }, { "epoch": 3.55, "learning_rate": 1.5565728509463723e-05, "loss": 0.1277, "step": 72000 }, { "epoch": 3.55, "eval_bleu": 42.3528, "eval_gen_len": 35.2416, "eval_loss": 0.10199479013681412, "eval_runtime": 110.1197, "eval_samples_per_second": 9.472, "eval_steps_per_second": 0.3, "step": 72000 }, { "epoch": 3.75, "learning_rate": 1.5319400630914826e-05, "loss": 0.1282, "step": 76000 }, { "epoch": 3.75, "eval_bleu": 42.4538, "eval_gen_len": 35.4746, "eval_loss": 0.10173474997282028, "eval_runtime": 126.659, "eval_samples_per_second": 8.235, "eval_steps_per_second": 0.261, "step": 76000 }, { "epoch": 3.94, "learning_rate": 1.5073072752365931e-05, "loss": 0.129, "step": 80000 }, { "epoch": 3.94, "eval_bleu": 42.7722, "eval_gen_len": 34.8581, "eval_loss": 0.10149160027503967, "eval_runtime": 151.6781, "eval_samples_per_second": 6.876, "eval_steps_per_second": 0.218, "step": 80000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.3796012702564352e+17, "trial_name": null, "trial_params": null }