|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.12621359223301, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 46.9286, |
|
"eval_gen_len": 17.6388, |
|
"eval_loss": 0.2930362820625305, |
|
"eval_meteor": 0.6401, |
|
"eval_runtime": 21.9119, |
|
"eval_samples_per_second": 24.005, |
|
"eval_steps_per_second": 1.232, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 55.0709, |
|
"eval_gen_len": 17.481, |
|
"eval_loss": 0.17119793593883514, |
|
"eval_meteor": 0.7436, |
|
"eval_runtime": 17.1858, |
|
"eval_samples_per_second": 30.607, |
|
"eval_steps_per_second": 1.571, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0002757281553398058, |
|
"loss": 1.4069, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 56.5722, |
|
"eval_gen_len": 17.635, |
|
"eval_loss": 0.12680290639400482, |
|
"eval_meteor": 0.7412, |
|
"eval_runtime": 17.5013, |
|
"eval_samples_per_second": 30.055, |
|
"eval_steps_per_second": 1.543, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 59.7915, |
|
"eval_gen_len": 17.5513, |
|
"eval_loss": 0.10222692042589188, |
|
"eval_meteor": 0.7776, |
|
"eval_runtime": 17.1596, |
|
"eval_samples_per_second": 30.653, |
|
"eval_steps_per_second": 1.573, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00025145631067961165, |
|
"loss": 0.2339, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 60.1489, |
|
"eval_gen_len": 17.6255, |
|
"eval_loss": 0.09315221011638641, |
|
"eval_meteor": 0.7764, |
|
"eval_runtime": 17.1827, |
|
"eval_samples_per_second": 30.612, |
|
"eval_steps_per_second": 1.571, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 60.441, |
|
"eval_gen_len": 17.6749, |
|
"eval_loss": 0.08468244969844818, |
|
"eval_meteor": 0.7696, |
|
"eval_runtime": 17.5475, |
|
"eval_samples_per_second": 29.976, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 61.5099, |
|
"eval_gen_len": 17.6027, |
|
"eval_loss": 0.07199732959270477, |
|
"eval_meteor": 0.7875, |
|
"eval_runtime": 17.5698, |
|
"eval_samples_per_second": 29.938, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00022718446601941746, |
|
"loss": 0.1661, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 61.471, |
|
"eval_gen_len": 17.6901, |
|
"eval_loss": 0.07024037092924118, |
|
"eval_meteor": 0.7778, |
|
"eval_runtime": 17.3131, |
|
"eval_samples_per_second": 30.382, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 62.8426, |
|
"eval_gen_len": 17.6274, |
|
"eval_loss": 0.05566093325614929, |
|
"eval_meteor": 0.7954, |
|
"eval_runtime": 18.4096, |
|
"eval_samples_per_second": 28.572, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00020291262135922327, |
|
"loss": 0.1289, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 62.5415, |
|
"eval_gen_len": 17.6825, |
|
"eval_loss": 0.05298692733049393, |
|
"eval_meteor": 0.7883, |
|
"eval_runtime": 17.3063, |
|
"eval_samples_per_second": 30.394, |
|
"eval_steps_per_second": 1.56, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 62.8065, |
|
"eval_gen_len": 17.711, |
|
"eval_loss": 0.049979548901319504, |
|
"eval_meteor": 0.7881, |
|
"eval_runtime": 17.359, |
|
"eval_samples_per_second": 30.301, |
|
"eval_steps_per_second": 1.555, |
|
"step": 2266 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 62.6083, |
|
"eval_gen_len": 17.7167, |
|
"eval_loss": 0.04917723312973976, |
|
"eval_meteor": 0.7826, |
|
"eval_runtime": 17.3203, |
|
"eval_samples_per_second": 30.369, |
|
"eval_steps_per_second": 1.559, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.00017864077669902913, |
|
"loss": 0.1034, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 63.5012, |
|
"eval_gen_len": 17.6844, |
|
"eval_loss": 0.042322106659412384, |
|
"eval_meteor": 0.7943, |
|
"eval_runtime": 17.3469, |
|
"eval_samples_per_second": 30.322, |
|
"eval_steps_per_second": 1.556, |
|
"step": 2678 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 62.9858, |
|
"eval_gen_len": 17.7091, |
|
"eval_loss": 0.04112717881798744, |
|
"eval_meteor": 0.7876, |
|
"eval_runtime": 17.2586, |
|
"eval_samples_per_second": 30.478, |
|
"eval_steps_per_second": 1.564, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 0.00015436893203883494, |
|
"loss": 0.0862, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 64.4301, |
|
"eval_gen_len": 17.6806, |
|
"eval_loss": 0.03431503847241402, |
|
"eval_meteor": 0.8036, |
|
"eval_runtime": 17.1867, |
|
"eval_samples_per_second": 30.605, |
|
"eval_steps_per_second": 1.571, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 65.3347, |
|
"eval_gen_len": 17.654, |
|
"eval_loss": 0.031859882175922394, |
|
"eval_meteor": 0.8153, |
|
"eval_runtime": 17.2343, |
|
"eval_samples_per_second": 30.52, |
|
"eval_steps_per_second": 1.567, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.00013009708737864078, |
|
"loss": 0.073, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 62.9474, |
|
"eval_gen_len": 17.7433, |
|
"eval_loss": 0.03299812600016594, |
|
"eval_meteor": 0.7817, |
|
"eval_runtime": 17.342, |
|
"eval_samples_per_second": 30.331, |
|
"eval_steps_per_second": 1.557, |
|
"step": 3502 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 66.3794, |
|
"eval_gen_len": 17.6217, |
|
"eval_loss": 0.02293582074344158, |
|
"eval_meteor": 0.8285, |
|
"eval_runtime": 17.2657, |
|
"eval_samples_per_second": 30.465, |
|
"eval_steps_per_second": 1.564, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 64.1881, |
|
"eval_gen_len": 17.711, |
|
"eval_loss": 0.02707734704017639, |
|
"eval_meteor": 0.7974, |
|
"eval_runtime": 17.8576, |
|
"eval_samples_per_second": 29.455, |
|
"eval_steps_per_second": 1.512, |
|
"step": 3914 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 0.00010582524271844659, |
|
"loss": 0.0609, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 65.4997, |
|
"eval_gen_len": 17.6616, |
|
"eval_loss": 0.022358747199177742, |
|
"eval_meteor": 0.8156, |
|
"eval_runtime": 17.5005, |
|
"eval_samples_per_second": 30.056, |
|
"eval_steps_per_second": 1.543, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 64.8991, |
|
"eval_gen_len": 17.6863, |
|
"eval_loss": 0.022536493837833405, |
|
"eval_meteor": 0.8061, |
|
"eval_runtime": 17.556, |
|
"eval_samples_per_second": 29.961, |
|
"eval_steps_per_second": 1.538, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 8.155339805825241e-05, |
|
"loss": 0.0538, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 66.3962, |
|
"eval_gen_len": 17.6407, |
|
"eval_loss": 0.018802011385560036, |
|
"eval_meteor": 0.8251, |
|
"eval_runtime": 17.5884, |
|
"eval_samples_per_second": 29.906, |
|
"eval_steps_per_second": 1.535, |
|
"step": 4532 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 65.0479, |
|
"eval_gen_len": 17.6863, |
|
"eval_loss": 0.02161051519215107, |
|
"eval_meteor": 0.8069, |
|
"eval_runtime": 17.501, |
|
"eval_samples_per_second": 30.055, |
|
"eval_steps_per_second": 1.543, |
|
"step": 4738 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 66.0078, |
|
"eval_gen_len": 17.6559, |
|
"eval_loss": 0.01826297491788864, |
|
"eval_meteor": 0.8205, |
|
"eval_runtime": 17.773, |
|
"eval_samples_per_second": 29.595, |
|
"eval_steps_per_second": 1.519, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 5.728155339805825e-05, |
|
"loss": 0.0476, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 65.1378, |
|
"eval_gen_len": 17.6863, |
|
"eval_loss": 0.021101167425513268, |
|
"eval_meteor": 0.8087, |
|
"eval_runtime": 17.5024, |
|
"eval_samples_per_second": 30.053, |
|
"eval_steps_per_second": 1.543, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 65.5523, |
|
"eval_gen_len": 17.6749, |
|
"eval_loss": 0.018090983852744102, |
|
"eval_meteor": 0.8134, |
|
"eval_runtime": 17.7903, |
|
"eval_samples_per_second": 29.567, |
|
"eval_steps_per_second": 1.518, |
|
"step": 5356 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 3.3009708737864073e-05, |
|
"loss": 0.044, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 66.1951, |
|
"eval_gen_len": 17.6597, |
|
"eval_loss": 0.016722695901989937, |
|
"eval_meteor": 0.8212, |
|
"eval_runtime": 17.6724, |
|
"eval_samples_per_second": 29.764, |
|
"eval_steps_per_second": 1.528, |
|
"step": 5562 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 66.2812, |
|
"eval_gen_len": 17.6559, |
|
"eval_loss": 0.0156955998390913, |
|
"eval_meteor": 0.8226, |
|
"eval_runtime": 17.5211, |
|
"eval_samples_per_second": 30.021, |
|
"eval_steps_per_second": 1.541, |
|
"step": 5768 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 65.4079, |
|
"eval_gen_len": 17.6863, |
|
"eval_loss": 0.017170317471027374, |
|
"eval_meteor": 0.8103, |
|
"eval_runtime": 17.494, |
|
"eval_samples_per_second": 30.067, |
|
"eval_steps_per_second": 1.543, |
|
"step": 5974 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"learning_rate": 8.737864077669902e-06, |
|
"loss": 0.04, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.55075881506816e+16, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|