|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1872, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"eval_loss": 0.2269495576620102, |
|
"eval_runtime": 301.5344, |
|
"eval_samples_per_second": 19.421, |
|
"eval_steps_per_second": 2.428, |
|
"eval_wer": 0.3076194253481403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"eval_loss": 0.19151096045970917, |
|
"eval_runtime": 302.1327, |
|
"eval_samples_per_second": 19.382, |
|
"eval_steps_per_second": 2.423, |
|
"eval_wer": 0.2824343380927199, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"eval_loss": 0.1993144452571869, |
|
"eval_runtime": 302.3659, |
|
"eval_samples_per_second": 19.367, |
|
"eval_steps_per_second": 2.421, |
|
"eval_wer": 0.296448087431694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"eval_loss": 0.18319852650165558, |
|
"eval_runtime": 303.1213, |
|
"eval_samples_per_second": 19.319, |
|
"eval_steps_per_second": 2.415, |
|
"eval_wer": 0.2734884540807333, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0683760683760684, |
|
"grad_norm": 0.8244175314903259, |
|
"learning_rate": 0.0007748306997742663, |
|
"loss": 0.9965, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0683760683760684, |
|
"eval_loss": 0.1763978898525238, |
|
"eval_runtime": 304.6466, |
|
"eval_samples_per_second": 19.222, |
|
"eval_steps_per_second": 2.403, |
|
"eval_wer": 0.26493918561607616, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"eval_loss": 0.17325642704963684, |
|
"eval_runtime": 304.5973, |
|
"eval_samples_per_second": 19.225, |
|
"eval_steps_per_second": 2.403, |
|
"eval_wer": 0.2625154239379517, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4957264957264957, |
|
"eval_loss": 0.17252753674983978, |
|
"eval_runtime": 305.7799, |
|
"eval_samples_per_second": 19.151, |
|
"eval_steps_per_second": 2.394, |
|
"eval_wer": 0.25918826017979907, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7094017094017095, |
|
"eval_loss": 0.17061175405979156, |
|
"eval_runtime": 306.9826, |
|
"eval_samples_per_second": 19.076, |
|
"eval_steps_per_second": 2.385, |
|
"eval_wer": 0.25806451612903225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"eval_loss": 0.16809020936489105, |
|
"eval_runtime": 306.5557, |
|
"eval_samples_per_second": 19.103, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.25854926846465714, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1367521367521367, |
|
"grad_norm": 0.886053204536438, |
|
"learning_rate": 0.0004926636568848759, |
|
"loss": 0.2922, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.1367521367521367, |
|
"eval_loss": 0.16939426958560944, |
|
"eval_runtime": 308.102, |
|
"eval_samples_per_second": 19.007, |
|
"eval_steps_per_second": 2.376, |
|
"eval_wer": 0.2590560549973559, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.3504273504273505, |
|
"eval_loss": 0.17009997367858887, |
|
"eval_runtime": 307.4896, |
|
"eval_samples_per_second": 19.045, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.25753569539925963, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"eval_loss": 0.17012837529182434, |
|
"eval_runtime": 308.404, |
|
"eval_samples_per_second": 18.988, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.2613916798871849, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_loss": 0.16536183655261993, |
|
"eval_runtime": 307.6932, |
|
"eval_samples_per_second": 19.032, |
|
"eval_steps_per_second": 2.379, |
|
"eval_wer": 0.25354750572889123, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9914529914529915, |
|
"eval_loss": 0.16437660157680511, |
|
"eval_runtime": 307.9587, |
|
"eval_samples_per_second": 19.016, |
|
"eval_steps_per_second": 2.377, |
|
"eval_wer": 0.2516525647805394, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.2051282051282053, |
|
"grad_norm": 0.9470628499984741, |
|
"learning_rate": 0.00021049661399548533, |
|
"loss": 0.2788, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.2051282051282053, |
|
"eval_loss": 0.1636321097612381, |
|
"eval_runtime": 308.3318, |
|
"eval_samples_per_second": 18.993, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.2539881896703684, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.4188034188034186, |
|
"eval_loss": 0.16157202422618866, |
|
"eval_runtime": 307.8619, |
|
"eval_samples_per_second": 19.022, |
|
"eval_steps_per_second": 2.378, |
|
"eval_wer": 0.2511678124449145, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.6324786324786325, |
|
"eval_loss": 0.1600475162267685, |
|
"eval_runtime": 308.3358, |
|
"eval_samples_per_second": 18.992, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.24702538339502908, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"eval_loss": 0.15865576267242432, |
|
"eval_runtime": 309.7473, |
|
"eval_samples_per_second": 18.906, |
|
"eval_steps_per_second": 2.363, |
|
"eval_wer": 0.2464304600740349, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1872, |
|
"total_flos": 2.9809868531482165e+19, |
|
"train_loss": 0.4726643766093458, |
|
"train_runtime": 14997.7554, |
|
"train_samples_per_second": 3.994, |
|
"train_steps_per_second": 0.125 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1872, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9809868531482165e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|