Chillarmo's picture
End of training
f4e97ac verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 100,
"global_step": 1872,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21367521367521367,
"eval_loss": 0.2269495576620102,
"eval_runtime": 301.5344,
"eval_samples_per_second": 19.421,
"eval_steps_per_second": 2.428,
"eval_wer": 0.3076194253481403,
"step": 100
},
{
"epoch": 0.42735042735042733,
"eval_loss": 0.19151096045970917,
"eval_runtime": 302.1327,
"eval_samples_per_second": 19.382,
"eval_steps_per_second": 2.423,
"eval_wer": 0.2824343380927199,
"step": 200
},
{
"epoch": 0.6410256410256411,
"eval_loss": 0.1993144452571869,
"eval_runtime": 302.3659,
"eval_samples_per_second": 19.367,
"eval_steps_per_second": 2.421,
"eval_wer": 0.296448087431694,
"step": 300
},
{
"epoch": 0.8547008547008547,
"eval_loss": 0.18319852650165558,
"eval_runtime": 303.1213,
"eval_samples_per_second": 19.319,
"eval_steps_per_second": 2.415,
"eval_wer": 0.2734884540807333,
"step": 400
},
{
"epoch": 1.0683760683760684,
"grad_norm": 0.8244175314903259,
"learning_rate": 0.0007748306997742663,
"loss": 0.9965,
"step": 500
},
{
"epoch": 1.0683760683760684,
"eval_loss": 0.1763978898525238,
"eval_runtime": 304.6466,
"eval_samples_per_second": 19.222,
"eval_steps_per_second": 2.403,
"eval_wer": 0.26493918561607616,
"step": 500
},
{
"epoch": 1.282051282051282,
"eval_loss": 0.17325642704963684,
"eval_runtime": 304.5973,
"eval_samples_per_second": 19.225,
"eval_steps_per_second": 2.403,
"eval_wer": 0.2625154239379517,
"step": 600
},
{
"epoch": 1.4957264957264957,
"eval_loss": 0.17252753674983978,
"eval_runtime": 305.7799,
"eval_samples_per_second": 19.151,
"eval_steps_per_second": 2.394,
"eval_wer": 0.25918826017979907,
"step": 700
},
{
"epoch": 1.7094017094017095,
"eval_loss": 0.17061175405979156,
"eval_runtime": 306.9826,
"eval_samples_per_second": 19.076,
"eval_steps_per_second": 2.385,
"eval_wer": 0.25806451612903225,
"step": 800
},
{
"epoch": 1.9230769230769231,
"eval_loss": 0.16809020936489105,
"eval_runtime": 306.5557,
"eval_samples_per_second": 19.103,
"eval_steps_per_second": 2.388,
"eval_wer": 0.25854926846465714,
"step": 900
},
{
"epoch": 2.1367521367521367,
"grad_norm": 0.886053204536438,
"learning_rate": 0.0004926636568848759,
"loss": 0.2922,
"step": 1000
},
{
"epoch": 2.1367521367521367,
"eval_loss": 0.16939426958560944,
"eval_runtime": 308.102,
"eval_samples_per_second": 19.007,
"eval_steps_per_second": 2.376,
"eval_wer": 0.2590560549973559,
"step": 1000
},
{
"epoch": 2.3504273504273505,
"eval_loss": 0.17009997367858887,
"eval_runtime": 307.4896,
"eval_samples_per_second": 19.045,
"eval_steps_per_second": 2.381,
"eval_wer": 0.25753569539925963,
"step": 1100
},
{
"epoch": 2.564102564102564,
"eval_loss": 0.17012837529182434,
"eval_runtime": 308.404,
"eval_samples_per_second": 18.988,
"eval_steps_per_second": 2.374,
"eval_wer": 0.2613916798871849,
"step": 1200
},
{
"epoch": 2.7777777777777777,
"eval_loss": 0.16536183655261993,
"eval_runtime": 307.6932,
"eval_samples_per_second": 19.032,
"eval_steps_per_second": 2.379,
"eval_wer": 0.25354750572889123,
"step": 1300
},
{
"epoch": 2.9914529914529915,
"eval_loss": 0.16437660157680511,
"eval_runtime": 307.9587,
"eval_samples_per_second": 19.016,
"eval_steps_per_second": 2.377,
"eval_wer": 0.2516525647805394,
"step": 1400
},
{
"epoch": 3.2051282051282053,
"grad_norm": 0.9470628499984741,
"learning_rate": 0.00021049661399548533,
"loss": 0.2788,
"step": 1500
},
{
"epoch": 3.2051282051282053,
"eval_loss": 0.1636321097612381,
"eval_runtime": 308.3318,
"eval_samples_per_second": 18.993,
"eval_steps_per_second": 2.374,
"eval_wer": 0.2539881896703684,
"step": 1500
},
{
"epoch": 3.4188034188034186,
"eval_loss": 0.16157202422618866,
"eval_runtime": 307.8619,
"eval_samples_per_second": 19.022,
"eval_steps_per_second": 2.378,
"eval_wer": 0.2511678124449145,
"step": 1600
},
{
"epoch": 3.6324786324786325,
"eval_loss": 0.1600475162267685,
"eval_runtime": 308.3358,
"eval_samples_per_second": 18.992,
"eval_steps_per_second": 2.374,
"eval_wer": 0.24702538339502908,
"step": 1700
},
{
"epoch": 3.8461538461538463,
"eval_loss": 0.15865576267242432,
"eval_runtime": 309.7473,
"eval_samples_per_second": 18.906,
"eval_steps_per_second": 2.363,
"eval_wer": 0.2464304600740349,
"step": 1800
},
{
"epoch": 4.0,
"step": 1872,
"total_flos": 2.9809868531482165e+19,
"train_loss": 0.4726643766093458,
"train_runtime": 14997.7554,
"train_samples_per_second": 3.994,
"train_steps_per_second": 0.125
}
],
"logging_steps": 500,
"max_steps": 1872,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.9809868531482165e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}