End of training

Browse files

Files changed (5) hide show

README.md +6 -4
all_results.json +16 -0
eval_results.json +10 -0
train_results.json +9 -0
trainer_state.json +2002 -0

README.md CHANGED Viewed

@@ -3,6 +3,8 @@ library_name: transformers
 license: apache-2.0
 base_model: facebook/wav2vec2-xls-r-300m
 tags:
 - generated_from_trainer
 metrics:
 - wer
@@ -16,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
 # Wav2vec2-fula-no0
-This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4417
-- Wer: 0.5500
-- Cer: 0.1528
 ## Model description

 license: apache-2.0
 base_model: facebook/wav2vec2-xls-r-300m
 tags:
+- automatic-speech-recognition
+- Leonel-Maia/fulfulde-no0
 - generated_from_trainer
 metrics:
 - wer
 # Wav2vec2-fula-no0
+This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the LEONEL-MAIA/FULFULDE-NO0 - DEFAULT dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4407
+- Wer: 0.5583
+- Cer: 0.1549
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 2.5601733307071104,
+    "eval_cer": 0.15485007139457402,
+    "eval_loss": 0.4407041072845459,
+    "eval_runtime": 161.3047,
+    "eval_samples": 1225,
+    "eval_samples_per_second": 7.594,
+    "eval_steps_per_second": 0.955,
+    "eval_wer": 0.5582874099194574,
+    "total_flos": 2.4200639627895153e+19,
+    "train_loss": 0.5404087718450106,
+    "train_runtime": 33172.3648,
+    "train_samples": 40609,
+    "train_samples_per_second": 73.451,
+    "train_steps_per_second": 4.591
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 2.5601733307071104,
+    "eval_cer": 0.15485007139457402,
+    "eval_loss": 0.4407041072845459,
+    "eval_runtime": 161.3047,
+    "eval_samples": 1225,
+    "eval_samples_per_second": 7.594,
+    "eval_steps_per_second": 0.955,
+    "eval_wer": 0.5582874099194574
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.5601733307071104,
+    "total_flos": 2.4200639627895153e+19,
+    "train_loss": 0.5404087718450106,
+    "train_runtime": 33172.3648,
+    "train_samples": 40609,
+    "train_samples_per_second": 73.451,
+    "train_steps_per_second": 4.591
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2002 @@

+{
+  "best_global_step": 5000,
+  "best_metric": 0.4407041072845459,
+  "best_model_checkpoint": "./Wav2vec2-fula-no0/checkpoint-5000",
+  "epoch": 2.5601733307071104,
+  "eval_steps": 500,
+  "global_step": 6500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.009848335631278314,
+      "grad_norm": 12.046097755432129,
+      "learning_rate": 6.899999999999999e-06,
+      "loss": 14.4755,
+      "step": 25
+    },
+    {
+      "epoch": 0.019696671262556628,
+      "grad_norm": 14.947002410888672,
+      "learning_rate": 1.44e-05,
+      "loss": 12.3724,
+      "step": 50
+    },
+    {
+      "epoch": 0.029545006893834942,
+      "grad_norm": 12.23189640045166,
+      "learning_rate": 2.1899999999999997e-05,
+      "loss": 9.1857,
+      "step": 75
+    },
+    {
+      "epoch": 0.039393342525113256,
+      "grad_norm": 9.180994987487793,
+      "learning_rate": 2.94e-05,
+      "loss": 5.4281,
+      "step": 100
+    },
+    {
+      "epoch": 0.04924167815639157,
+      "grad_norm": 6.606555461883545,
+      "learning_rate": 3.6899999999999996e-05,
+      "loss": 4.5971,
+      "step": 125
+    },
+    {
+      "epoch": 0.059090013787669884,
+      "grad_norm": 5.29020881652832,
+      "learning_rate": 4.4399999999999995e-05,
+      "loss": 3.9427,
+      "step": 150
+    },
+    {
+      "epoch": 0.06893834941894819,
+      "grad_norm": 2.6828274726867676,
+      "learning_rate": 5.1899999999999994e-05,
+      "loss": 3.6124,
+      "step": 175
+    },
+    {
+      "epoch": 0.07878668505022651,
+      "grad_norm": 1.4759694337844849,
+      "learning_rate": 5.94e-05,
+      "loss": 3.2772,
+      "step": 200
+    },
+    {
+      "epoch": 0.08863502068150482,
+      "grad_norm": 0.8410621285438538,
+      "learning_rate": 6.69e-05,
+      "loss": 3.0949,
+      "step": 225
+    },
+    {
+      "epoch": 0.09848335631278314,
+      "grad_norm": 1.349051594734192,
+      "learning_rate": 7.439999999999999e-05,
+      "loss": 2.9748,
+      "step": 250
+    },
+    {
+      "epoch": 0.10833169194406145,
+      "grad_norm": 0.9243702292442322,
+      "learning_rate": 8.19e-05,
+      "loss": 2.9459,
+      "step": 275
+    },
+    {
+      "epoch": 0.11818002757533977,
+      "grad_norm": 0.5389373302459717,
+      "learning_rate": 8.939999999999999e-05,
+      "loss": 2.9207,
+      "step": 300
+    },
+    {
+      "epoch": 0.1280283632066181,
+      "grad_norm": 1.399165153503418,
+      "learning_rate": 9.69e-05,
+      "loss": 2.8467,
+      "step": 325
+    },
+    {
+      "epoch": 0.13787669883789638,
+      "grad_norm": 0.7010773420333862,
+      "learning_rate": 0.00010439999999999999,
+      "loss": 2.5841,
+      "step": 350
+    },
+    {
+      "epoch": 0.1477250344691747,
+      "grad_norm": 0.8464407920837402,
+      "learning_rate": 0.0001119,
+      "loss": 2.1433,
+      "step": 375
+    },
+    {
+      "epoch": 0.15757337010045303,
+      "grad_norm": 1.085396647453308,
+      "learning_rate": 0.0001194,
+      "loss": 1.4039,
+      "step": 400
+    },
+    {
+      "epoch": 0.16742170573173135,
+      "grad_norm": 0.8390390276908875,
+      "learning_rate": 0.0001269,
+      "loss": 0.9324,
+      "step": 425
+    },
+    {
+      "epoch": 0.17727004136300964,
+      "grad_norm": 1.0844223499298096,
+      "learning_rate": 0.0001344,
+      "loss": 0.7308,
+      "step": 450
+    },
+    {
+      "epoch": 0.18711837699428796,
+      "grad_norm": 0.7026138305664062,
+      "learning_rate": 0.00014189999999999998,
+      "loss": 0.6444,
+      "step": 475
+    },
+    {
+      "epoch": 0.19696671262556628,
+      "grad_norm": 0.996733546257019,
+      "learning_rate": 0.0001494,
+      "loss": 0.5615,
+      "step": 500
+    },
+    {
+      "epoch": 0.19696671262556628,
+      "eval_cer": 0.2082246549262256,
+      "eval_loss": 0.6706342697143555,
+      "eval_runtime": 163.1895,
+      "eval_samples_per_second": 7.507,
+      "eval_steps_per_second": 0.944,
+      "eval_wer": 0.7328846969054684,
+      "step": 500
+    },
+    {
+      "epoch": 0.2068150482568446,
+      "grad_norm": 0.7681376338005066,
+      "learning_rate": 0.0001569,
+      "loss": 0.5653,
+      "step": 525
+    },
+    {
+      "epoch": 0.2166633838881229,
+      "grad_norm": 0.9657942056655884,
+      "learning_rate": 0.0001644,
+      "loss": 0.5054,
+      "step": 550
+    },
+    {
+      "epoch": 0.22651171951940122,
+      "grad_norm": 0.6645617485046387,
+      "learning_rate": 0.00017189999999999998,
+      "loss": 0.4903,
+      "step": 575
+    },
+    {
+      "epoch": 0.23636005515067954,
+      "grad_norm": 0.8374491930007935,
+      "learning_rate": 0.00017939999999999997,
+      "loss": 0.4468,
+      "step": 600
+    },
+    {
+      "epoch": 0.24620839078195786,
+      "grad_norm": 0.634929895401001,
+      "learning_rate": 0.0001869,
+      "loss": 0.5071,
+      "step": 625
+    },
+    {
+      "epoch": 0.2560567264132362,
+      "grad_norm": 1.10395348072052,
+      "learning_rate": 0.00019439999999999998,
+      "loss": 0.4589,
+      "step": 650
+    },
+    {
+      "epoch": 0.2659050620445145,
+      "grad_norm": 0.6340622305870056,
+      "learning_rate": 0.0002019,
+      "loss": 0.4443,
+      "step": 675
+    },
+    {
+      "epoch": 0.27575339767579277,
+      "grad_norm": 0.8145949244499207,
+      "learning_rate": 0.00020939999999999997,
+      "loss": 0.4012,
+      "step": 700
+    },
+    {
+      "epoch": 0.2856017333070711,
+      "grad_norm": 0.6671243906021118,
+      "learning_rate": 0.0002169,
+      "loss": 0.426,
+      "step": 725
+    },
+    {
+      "epoch": 0.2954500689383494,
+      "grad_norm": 0.9220252633094788,
+      "learning_rate": 0.00022439999999999998,
+      "loss": 0.3847,
+      "step": 750
+    },
+    {
+      "epoch": 0.30529840456962776,
+      "grad_norm": 0.565290093421936,
+      "learning_rate": 0.0002319,
+      "loss": 0.4275,
+      "step": 775
+    },
+    {
+      "epoch": 0.31514674020090605,
+      "grad_norm": 0.9773520231246948,
+      "learning_rate": 0.0002394,
+      "loss": 0.4157,
+      "step": 800
+    },
+    {
+      "epoch": 0.32499507583218434,
+      "grad_norm": 0.46723672747612,
+      "learning_rate": 0.0002469,
+      "loss": 0.3753,
+      "step": 825
+    },
+    {
+      "epoch": 0.3348434114634627,
+      "grad_norm": 0.8790475726127625,
+      "learning_rate": 0.00025439999999999995,
+      "loss": 0.3772,
+      "step": 850
+    },
+    {
+      "epoch": 0.344691747094741,
+      "grad_norm": 0.5482104420661926,
+      "learning_rate": 0.00026189999999999997,
+      "loss": 0.5094,
+      "step": 875
+    },
+    {
+      "epoch": 0.3545400827260193,
+      "grad_norm": 0.6486608982086182,
+      "learning_rate": 0.0002694,
+      "loss": 0.3437,
+      "step": 900
+    },
+    {
+      "epoch": 0.3643884183572976,
+      "grad_norm": 0.5524376034736633,
+      "learning_rate": 0.0002769,
+      "loss": 0.3633,
+      "step": 925
+    },
+    {
+      "epoch": 0.3742367539885759,
+      "grad_norm": 0.8503928184509277,
+      "learning_rate": 0.0002844,
+      "loss": 0.3734,
+      "step": 950
+    },
+    {
+      "epoch": 0.38408508961985427,
+      "grad_norm": 0.46665817499160767,
+      "learning_rate": 0.0002919,
+      "loss": 0.4187,
+      "step": 975
+    },
+    {
+      "epoch": 0.39393342525113256,
+      "grad_norm": 0.6875209808349609,
+      "learning_rate": 0.00029939999999999996,
+      "loss": 0.3606,
+      "step": 1000
+    },
+    {
+      "epoch": 0.39393342525113256,
+      "eval_cer": 0.17860066634935745,
+      "eval_loss": 0.5461390018463135,
+      "eval_runtime": 163.7996,
+      "eval_samples_per_second": 7.479,
+      "eval_steps_per_second": 0.94,
+      "eval_wer": 0.635173802458669,
+      "step": 1000
+    },
+    {
+      "epoch": 0.40378176088241086,
+      "grad_norm": 0.5352048277854919,
+      "learning_rate": 0.00029995438921205707,
+      "loss": 0.3672,
+      "step": 1025
+    },
+    {
+      "epoch": 0.4136300965136892,
+      "grad_norm": 0.8251035213470459,
+      "learning_rate": 0.0002999048122686409,
+      "loss": 0.339,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4234784321449675,
+      "grad_norm": 0.3810572326183319,
+      "learning_rate": 0.0002998552353252247,
+      "loss": 0.3476,
+      "step": 1075
+    },
+    {
+      "epoch": 0.4333267677762458,
+      "grad_norm": 0.7540473341941833,
+      "learning_rate": 0.00029980565838180854,
+      "loss": 0.3312,
+      "step": 1100
+    },
+    {
+      "epoch": 0.44317510340752414,
+      "grad_norm": 0.4706610143184662,
+      "learning_rate": 0.00029975608143839236,
+      "loss": 0.3824,
+      "step": 1125
+    },
+    {
+      "epoch": 0.45302343903880243,
+      "grad_norm": 0.8626775145530701,
+      "learning_rate": 0.0002997065044949762,
+      "loss": 0.3151,
+      "step": 1150
+    },
+    {
+      "epoch": 0.4628717746700808,
+      "grad_norm": 0.49395477771759033,
+      "learning_rate": 0.00029965692755156,
+      "loss": 0.329,
+      "step": 1175
+    },
+    {
+      "epoch": 0.4727201103013591,
+      "grad_norm": 0.7644901871681213,
+      "learning_rate": 0.00029960735060814383,
+      "loss": 0.3286,
+      "step": 1200
+    },
+    {
+      "epoch": 0.48256844593263737,
+      "grad_norm": 0.4805561304092407,
+      "learning_rate": 0.0002995577736647276,
+      "loss": 0.3309,
+      "step": 1225
+    },
+    {
+      "epoch": 0.4924167815639157,
+      "grad_norm": 0.8837474584579468,
+      "learning_rate": 0.0002995081967213115,
+      "loss": 0.2904,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5022651171951941,
+      "grad_norm": 0.4018630087375641,
+      "learning_rate": 0.00029945861977789524,
+      "loss": 0.3672,
+      "step": 1275
+    },
+    {
+      "epoch": 0.5121134528264724,
+      "grad_norm": 0.7346324324607849,
+      "learning_rate": 0.0002994090428344791,
+      "loss": 0.2945,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5219617884577507,
+      "grad_norm": 0.40217846632003784,
+      "learning_rate": 0.0002993594658910629,
+      "loss": 0.3403,
+      "step": 1325
+    },
+    {
+      "epoch": 0.531810124089029,
+      "grad_norm": 1.1682546138763428,
+      "learning_rate": 0.0002993098889476467,
+      "loss": 0.3214,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5416584597203072,
+      "grad_norm": 0.5751331448554993,
+      "learning_rate": 0.00029926031200423053,
+      "loss": 0.3351,
+      "step": 1375
+    },
+    {
+      "epoch": 0.5515067953515855,
+      "grad_norm": 0.6926270127296448,
+      "learning_rate": 0.00029921073506081436,
+      "loss": 0.2982,
+      "step": 1400
+    },
+    {
+      "epoch": 0.5613551309828639,
+      "grad_norm": 0.48034876585006714,
+      "learning_rate": 0.0002991611581173982,
+      "loss": 0.3248,
+      "step": 1425
+    },
+    {
+      "epoch": 0.5712034666141422,
+      "grad_norm": 0.9431607127189636,
+      "learning_rate": 0.000299111581173982,
+      "loss": 0.3203,
+      "step": 1450
+    },
+    {
+      "epoch": 0.5810518022454205,
+      "grad_norm": 0.4477253258228302,
+      "learning_rate": 0.0002990620042305658,
+      "loss": 0.4062,
+      "step": 1475
+    },
+    {
+      "epoch": 0.5909001378766988,
+      "grad_norm": 0.7116991877555847,
+      "learning_rate": 0.00029901242728714965,
+      "loss": 0.3103,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5909001378766988,
+      "eval_cer": 0.16950023798191338,
+      "eval_loss": 0.49604567885398865,
+      "eval_runtime": 162.9792,
+      "eval_samples_per_second": 7.516,
+      "eval_steps_per_second": 0.945,
+      "eval_wer": 0.602585841458245,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6007484735079771,
+      "grad_norm": 0.6022364497184753,
+      "learning_rate": 0.00029896285034373347,
+      "loss": 0.2932,
+      "step": 1525
+    },
+    {
+      "epoch": 0.6105968091392555,
+      "grad_norm": 0.5500746369361877,
+      "learning_rate": 0.00029891327340031724,
+      "loss": 0.2917,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6204451447705338,
+      "grad_norm": 0.4216461181640625,
+      "learning_rate": 0.00029886369645690106,
+      "loss": 0.3091,
+      "step": 1575
+    },
+    {
+      "epoch": 0.6302934804018121,
+      "grad_norm": 0.579009473323822,
+      "learning_rate": 0.0002988141195134849,
+      "loss": 0.2846,
+      "step": 1600
+    },
+    {
+      "epoch": 0.6401418160330904,
+      "grad_norm": 0.4677256941795349,
+      "learning_rate": 0.0002987645425700687,
+      "loss": 0.2971,
+      "step": 1625
+    },
+    {
+      "epoch": 0.6499901516643687,
+      "grad_norm": 0.8396487236022949,
+      "learning_rate": 0.00029871496562665253,
+      "loss": 0.2865,
+      "step": 1650
+    },
+    {
+      "epoch": 0.659838487295647,
+      "grad_norm": 0.45849570631980896,
+      "learning_rate": 0.00029866538868323635,
+      "loss": 0.3084,
+      "step": 1675
+    },
+    {
+      "epoch": 0.6696868229269254,
+      "grad_norm": 0.44411608576774597,
+      "learning_rate": 0.0002986158117398202,
+      "loss": 0.2729,
+      "step": 1700
+    },
+    {
+      "epoch": 0.6795351585582037,
+      "grad_norm": 0.4640048146247864,
+      "learning_rate": 0.000298566234796404,
+      "loss": 0.28,
+      "step": 1725
+    },
+    {
+      "epoch": 0.689383494189482,
+      "grad_norm": 0.6684908866882324,
+      "learning_rate": 0.00029851665785298777,
+      "loss": 0.2853,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6992318298207603,
+      "grad_norm": 0.38581475615501404,
+      "learning_rate": 0.00029846708090957165,
+      "loss": 0.3043,
+      "step": 1775
+    },
+    {
+      "epoch": 0.7090801654520386,
+      "grad_norm": 0.7165818810462952,
+      "learning_rate": 0.0002984175039661554,
+      "loss": 0.2659,
+      "step": 1800
+    },
+    {
+      "epoch": 0.718928501083317,
+      "grad_norm": 0.47936758399009705,
+      "learning_rate": 0.0002983679270227393,
+      "loss": 0.3179,
+      "step": 1825
+    },
+    {
+      "epoch": 0.7287768367145953,
+      "grad_norm": 0.8553590178489685,
+      "learning_rate": 0.00029831835007932306,
+      "loss": 0.2658,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7386251723458735,
+      "grad_norm": 0.8372988104820251,
+      "learning_rate": 0.00029826877313590694,
+      "loss": 0.3262,
+      "step": 1875
+    },
+    {
+      "epoch": 0.7484735079771518,
+      "grad_norm": 0.6207088232040405,
+      "learning_rate": 0.0002982191961924907,
+      "loss": 0.2492,
+      "step": 1900
+    },
+    {
+      "epoch": 0.7583218436084301,
+      "grad_norm": 0.41880714893341064,
+      "learning_rate": 0.00029816961924907453,
+      "loss": 0.2913,
+      "step": 1925
+    },
+    {
+      "epoch": 0.7681701792397085,
+      "grad_norm": 0.5808923840522766,
+      "learning_rate": 0.00029812004230565835,
+      "loss": 0.2747,
+      "step": 1950
+    },
+    {
+      "epoch": 0.7780185148709868,
+      "grad_norm": 0.41703030467033386,
+      "learning_rate": 0.0002980704653622422,
+      "loss": 0.2835,
+      "step": 1975
+    },
+    {
+      "epoch": 0.7878668505022651,
+      "grad_norm": 0.5971771478652954,
+      "learning_rate": 0.000298020888418826,
+      "loss": 0.228,
+      "step": 2000
+    },
+    {
+      "epoch": 0.7878668505022651,
+      "eval_cer": 0.16345549738219894,
+      "eval_loss": 0.5086885094642639,
+      "eval_runtime": 163.7155,
+      "eval_samples_per_second": 7.482,
+      "eval_steps_per_second": 0.941,
+      "eval_wer": 0.5916701992369648,
+      "step": 2000
+    },
+    {
+      "epoch": 0.7977151861335434,
+      "grad_norm": 0.39887216687202454,
+      "learning_rate": 0.0002979713114754098,
+      "loss": 0.2978,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8075635217648217,
+      "grad_norm": 0.6058250665664673,
+      "learning_rate": 0.00029792173453199364,
+      "loss": 0.258,
+      "step": 2050
+    },
+    {
+      "epoch": 0.8174118573961,
+      "grad_norm": 0.37628692388534546,
+      "learning_rate": 0.00029787215758857747,
+      "loss": 0.2648,
+      "step": 2075
+    },
+    {
+      "epoch": 0.8272601930273784,
+      "grad_norm": 0.6003425121307373,
+      "learning_rate": 0.0002978225806451613,
+      "loss": 0.2536,
+      "step": 2100
+    },
+    {
+      "epoch": 0.8371085286586567,
+      "grad_norm": 0.3735896348953247,
+      "learning_rate": 0.00029777300370174506,
+      "loss": 0.2853,
+      "step": 2125
+    },
+    {
+      "epoch": 0.846956864289935,
+      "grad_norm": 0.6161431074142456,
+      "learning_rate": 0.0002977234267583289,
+      "loss": 0.2388,
+      "step": 2150
+    },
+    {
+      "epoch": 0.8568051999212133,
+      "grad_norm": 0.4847399890422821,
+      "learning_rate": 0.0002976738498149127,
+      "loss": 0.2982,
+      "step": 2175
+    },
+    {
+      "epoch": 0.8666535355524916,
+      "grad_norm": 0.5926830172538757,
+      "learning_rate": 0.0002976242728714965,
+      "loss": 0.2716,
+      "step": 2200
+    },
+    {
+      "epoch": 0.87650187118377,
+      "grad_norm": 0.445588618516922,
+      "learning_rate": 0.00029757469592808035,
+      "loss": 0.269,
+      "step": 2225
+    },
+    {
+      "epoch": 0.8863502068150483,
+      "grad_norm": 0.7648467421531677,
+      "learning_rate": 0.00029752511898466417,
+      "loss": 0.2669,
+      "step": 2250
+    },
+    {
+      "epoch": 0.8961985424463266,
+      "grad_norm": 0.32576897740364075,
+      "learning_rate": 0.000297475542041248,
+      "loss": 0.2948,
+      "step": 2275
+    },
+    {
+      "epoch": 0.9060468780776049,
+      "grad_norm": 0.63624507188797,
+      "learning_rate": 0.0002974259650978318,
+      "loss": 0.2242,
+      "step": 2300
+    },
+    {
+      "epoch": 0.9158952137088832,
+      "grad_norm": 0.5524712204933167,
+      "learning_rate": 0.00029737638815441564,
+      "loss": 0.2999,
+      "step": 2325
+    },
+    {
+      "epoch": 0.9257435493401616,
+      "grad_norm": 1.4110374450683594,
+      "learning_rate": 0.00029732681121099946,
+      "loss": 0.2717,
+      "step": 2350
+    },
+    {
+      "epoch": 0.9355918849714399,
+      "grad_norm": 0.3871447443962097,
+      "learning_rate": 0.00029727723426758323,
+      "loss": 0.3137,
+      "step": 2375
+    },
+    {
+      "epoch": 0.9454402206027182,
+      "grad_norm": 0.5803717970848083,
+      "learning_rate": 0.0002972276573241671,
+      "loss": 0.2636,
+      "step": 2400
+    },
+    {
+      "epoch": 0.9552885562339964,
+      "grad_norm": 0.43855154514312744,
+      "learning_rate": 0.0002971780803807509,
+      "loss": 0.2824,
+      "step": 2425
+    },
+    {
+      "epoch": 0.9651368918652747,
+      "grad_norm": 0.557528018951416,
+      "learning_rate": 0.00029712850343733475,
+      "loss": 0.2325,
+      "step": 2450
+    },
+    {
+      "epoch": 0.974985227496553,
+      "grad_norm": 0.4475504457950592,
+      "learning_rate": 0.0002970789264939185,
+      "loss": 0.2684,
+      "step": 2475
+    },
+    {
+      "epoch": 0.9848335631278314,
+      "grad_norm": 0.5186614990234375,
+      "learning_rate": 0.00029702934955050234,
+      "loss": 0.2502,
+      "step": 2500
+    },
+    {
+      "epoch": 0.9848335631278314,
+      "eval_cer": 0.16478819609709663,
+      "eval_loss": 0.47200512886047363,
+      "eval_runtime": 163.5041,
+      "eval_samples_per_second": 7.492,
+      "eval_steps_per_second": 0.942,
+      "eval_wer": 0.5835099618482408,
+      "step": 2500
+    },
+    {
+      "epoch": 0.9946818987591097,
+      "grad_norm": 0.41672027111053467,
+      "learning_rate": 0.00029697977260708617,
+      "loss": 0.2629,
+      "step": 2525
+    },
+    {
+      "epoch": 1.0043332676777625,
+      "grad_norm": 0.3539595901966095,
+      "learning_rate": 0.00029693019566367,
+      "loss": 0.273,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0141816033090407,
+      "grad_norm": 0.4776741564273834,
+      "learning_rate": 0.0002968806187202538,
+      "loss": 0.2057,
+      "step": 2575
+    },
+    {
+      "epoch": 1.0240299389403191,
+      "grad_norm": 0.5060381889343262,
+      "learning_rate": 0.00029683104177683764,
+      "loss": 0.3886,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0338782745715973,
+      "grad_norm": 0.387074738740921,
+      "learning_rate": 0.00029678146483342146,
+      "loss": 0.1943,
+      "step": 2625
+    },
+    {
+      "epoch": 1.0437266102028757,
+      "grad_norm": 0.3543827533721924,
+      "learning_rate": 0.0002967318878900053,
+      "loss": 0.2544,
+      "step": 2650
+    },
+    {
+      "epoch": 1.0535749458341541,
+      "grad_norm": 0.4274260103702545,
+      "learning_rate": 0.0002966823109465891,
+      "loss": 0.1986,
+      "step": 2675
+    },
+    {
+      "epoch": 1.0634232814654323,
+      "grad_norm": 0.37364107370376587,
+      "learning_rate": 0.00029663273400317293,
+      "loss": 0.2713,
+      "step": 2700
+    },
+    {
+      "epoch": 1.0732716170967107,
+      "grad_norm": 0.5992911458015442,
+      "learning_rate": 0.0002965831570597567,
+      "loss": 0.1983,
+      "step": 2725
+    },
+    {
+      "epoch": 1.0831199527279889,
+      "grad_norm": 0.4560203552246094,
+      "learning_rate": 0.0002965335801163405,
+      "loss": 0.264,
+      "step": 2750
+    },
+    {
+      "epoch": 1.0929682883592673,
+      "grad_norm": 0.4546229839324951,
+      "learning_rate": 0.00029648400317292434,
+      "loss": 0.1982,
+      "step": 2775
+    },
+    {
+      "epoch": 1.1028166239905457,
+      "grad_norm": 0.39508143067359924,
+      "learning_rate": 0.00029643442622950816,
+      "loss": 0.261,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1126649596218239,
+      "grad_norm": 0.527611255645752,
+      "learning_rate": 0.000296384849286092,
+      "loss": 0.1981,
+      "step": 2825
+    },
+    {
+      "epoch": 1.1225132952531023,
+      "grad_norm": 0.3761278986930847,
+      "learning_rate": 0.0002963352723426758,
+      "loss": 0.2404,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1323616308843805,
+      "grad_norm": 0.35845261812210083,
+      "learning_rate": 0.00029628569539925963,
+      "loss": 0.1828,
+      "step": 2875
+    },
+    {
+      "epoch": 1.1422099665156589,
+      "grad_norm": 0.38187527656555176,
+      "learning_rate": 0.00029623611845584346,
+      "loss": 0.2428,
+      "step": 2900
+    },
+    {
+      "epoch": 1.152058302146937,
+      "grad_norm": 0.44596055150032043,
+      "learning_rate": 0.0002961865415124273,
+      "loss": 0.1932,
+      "step": 2925
+    },
+    {
+      "epoch": 1.1619066377782155,
+      "grad_norm": 0.33598092198371887,
+      "learning_rate": 0.00029613696456901105,
+      "loss": 0.2846,
+      "step": 2950
+    },
+    {
+      "epoch": 1.1717549734094939,
+      "grad_norm": 0.5181595683097839,
+      "learning_rate": 0.0002960873876255949,
+      "loss": 0.2085,
+      "step": 2975
+    },
+    {
+      "epoch": 1.181603309040772,
+      "grad_norm": 0.4908069372177124,
+      "learning_rate": 0.0002960378106821787,
+      "loss": 0.2841,
+      "step": 3000
+    },
+    {
+      "epoch": 1.181603309040772,
+      "eval_cer": 0.1700523560209424,
+      "eval_loss": 0.46515917778015137,
+      "eval_runtime": 163.0519,
+      "eval_samples_per_second": 7.513,
+      "eval_steps_per_second": 0.944,
+      "eval_wer": 0.6019499788045782,
+      "step": 3000
+    },
+    {
+      "epoch": 1.1914516446720504,
+      "grad_norm": 0.4038330912590027,
+      "learning_rate": 0.00029598823373876257,
+      "loss": 0.2072,
+      "step": 3025
+    },
+    {
+      "epoch": 1.2012999803033289,
+      "grad_norm": 0.2689180374145508,
+      "learning_rate": 0.00029593865679534634,
+      "loss": 0.2815,
+      "step": 3050
+    },
+    {
+      "epoch": 1.211148315934607,
+      "grad_norm": 0.55915766954422,
+      "learning_rate": 0.00029588907985193016,
+      "loss": 0.1969,
+      "step": 3075
+    },
+    {
+      "epoch": 1.2209966515658854,
+      "grad_norm": 0.5152351260185242,
+      "learning_rate": 0.000295839502908514,
+      "loss": 0.2563,
+      "step": 3100
+    },
+    {
+      "epoch": 1.2308449871971636,
+      "grad_norm": 0.4780251681804657,
+      "learning_rate": 0.0002957899259650978,
+      "loss": 0.1865,
+      "step": 3125
+    },
+    {
+      "epoch": 1.240693322828442,
+      "grad_norm": 0.3721919357776642,
+      "learning_rate": 0.00029574034902168163,
+      "loss": 0.2804,
+      "step": 3150
+    },
+    {
+      "epoch": 1.2505416584597202,
+      "grad_norm": 0.33008575439453125,
+      "learning_rate": 0.00029569077207826545,
+      "loss": 0.2067,
+      "step": 3175
+    },
+    {
+      "epoch": 1.2603899940909986,
+      "grad_norm": 0.5002256035804749,
+      "learning_rate": 0.0002956411951348493,
+      "loss": 0.2868,
+      "step": 3200
+    },
+    {
+      "epoch": 1.270238329722277,
+      "grad_norm": 1.328635573387146,
+      "learning_rate": 0.0002955916181914331,
+      "loss": 0.2323,
+      "step": 3225
+    },
+    {
+      "epoch": 1.2800866653535552,
+      "grad_norm": 0.3654369115829468,
+      "learning_rate": 0.00029554204124801687,
+      "loss": 0.2363,
+      "step": 3250
+    },
+    {
+      "epoch": 1.2899350009848336,
+      "grad_norm": 0.468279629945755,
+      "learning_rate": 0.00029549246430460074,
+      "loss": 0.1716,
+      "step": 3275
+    },
+    {
+      "epoch": 1.299783336616112,
+      "grad_norm": 0.3845861256122589,
+      "learning_rate": 0.0002954428873611845,
+      "loss": 0.2792,
+      "step": 3300
+    },
+    {
+      "epoch": 1.3096316722473902,
+      "grad_norm": 0.40870147943496704,
+      "learning_rate": 0.00029539331041776834,
+      "loss": 0.1794,
+      "step": 3325
+    },
+    {
+      "epoch": 1.3194800078786684,
+      "grad_norm": 0.5596709847450256,
+      "learning_rate": 0.00029534373347435216,
+      "loss": 0.2471,
+      "step": 3350
+    },
+    {
+      "epoch": 1.3293283435099468,
+      "grad_norm": 0.5225990414619446,
+      "learning_rate": 0.000295294156530936,
+      "loss": 0.1847,
+      "step": 3375
+    },
+    {
+      "epoch": 1.3391766791412252,
+      "grad_norm": 0.2951211929321289,
+      "learning_rate": 0.0002952445795875198,
+      "loss": 0.2149,
+      "step": 3400
+    },
+    {
+      "epoch": 1.3490250147725034,
+      "grad_norm": 0.7100874185562134,
+      "learning_rate": 0.0002951950026441036,
+      "loss": 0.197,
+      "step": 3425
+    },
+    {
+      "epoch": 1.3588733504037818,
+      "grad_norm": 0.4289153218269348,
+      "learning_rate": 0.00029514542570068745,
+      "loss": 0.259,
+      "step": 3450
+    },
+    {
+      "epoch": 1.3687216860350602,
+      "grad_norm": 0.9042884707450867,
+      "learning_rate": 0.00029509584875727127,
+      "loss": 0.1585,
+      "step": 3475
+    },
+    {
+      "epoch": 1.3785700216663384,
+      "grad_norm": 0.3851844072341919,
+      "learning_rate": 0.0002950462718138551,
+      "loss": 0.2603,
+      "step": 3500
+    },
+    {
+      "epoch": 1.3785700216663384,
+      "eval_cer": 0.15875297477391717,
+      "eval_loss": 0.4725710451602936,
+      "eval_runtime": 163.6264,
+      "eval_samples_per_second": 7.487,
+      "eval_steps_per_second": 0.941,
+      "eval_wer": 0.5685142009325985,
+      "step": 3500
+    },
+    {
+      "epoch": 1.3884183572976168,
+      "grad_norm": 0.4688514769077301,
+      "learning_rate": 0.00029499669487043886,
+      "loss": 0.1828,
+      "step": 3525
+    },
+    {
+      "epoch": 1.398266692928895,
+      "grad_norm": 0.3662601709365845,
+      "learning_rate": 0.00029494711792702274,
+      "loss": 0.263,
+      "step": 3550
+    },
+    {
+      "epoch": 1.4081150285601733,
+      "grad_norm": 0.8322161436080933,
+      "learning_rate": 0.0002948975409836065,
+      "loss": 0.1968,
+      "step": 3575
+    },
+    {
+      "epoch": 1.4179633641914515,
+      "grad_norm": 0.6938912868499756,
+      "learning_rate": 0.0002948479640401904,
+      "loss": 0.2717,
+      "step": 3600
+    },
+    {
+      "epoch": 1.42781169982273,
+      "grad_norm": 0.6767099499702454,
+      "learning_rate": 0.00029479838709677416,
+      "loss": 0.1871,
+      "step": 3625
+    },
+    {
+      "epoch": 1.4376600354540083,
+      "grad_norm": 0.4281677305698395,
+      "learning_rate": 0.000294748810153358,
+      "loss": 0.2598,
+      "step": 3650
+    },
+    {
+      "epoch": 1.4475083710852865,
+      "grad_norm": 0.5565653443336487,
+      "learning_rate": 0.0002946992332099418,
+      "loss": 0.1913,
+      "step": 3675
+    },
+    {
+      "epoch": 1.457356706716565,
+      "grad_norm": 0.3767055869102478,
+      "learning_rate": 0.0002946496562665256,
+      "loss": 0.2536,
+      "step": 3700
+    },
+    {
+      "epoch": 1.4672050423478433,
+      "grad_norm": 0.5199496150016785,
+      "learning_rate": 0.00029460007932310945,
+      "loss": 0.1869,
+      "step": 3725
+    },
+    {
+      "epoch": 1.4770533779791215,
+      "grad_norm": 0.25990667939186096,
+      "learning_rate": 0.00029455050237969327,
+      "loss": 0.2672,
+      "step": 3750
+    },
+    {
+      "epoch": 1.4869017136104,
+      "grad_norm": 0.4943206310272217,
+      "learning_rate": 0.0002945009254362771,
+      "loss": 0.1842,
+      "step": 3775
+    },
+    {
+      "epoch": 1.496750049241678,
+      "grad_norm": 0.3140566051006317,
+      "learning_rate": 0.0002944513484928609,
+      "loss": 0.2321,
+      "step": 3800
+    },
+    {
+      "epoch": 1.5065983848729565,
+      "grad_norm": 0.47428634762763977,
+      "learning_rate": 0.0002944017715494447,
+      "loss": 0.1781,
+      "step": 3825
+    },
+    {
+      "epoch": 1.5164467205042347,
+      "grad_norm": 0.33789676427841187,
+      "learning_rate": 0.00029435219460602856,
+      "loss": 0.2895,
+      "step": 3850
+    },
+    {
+      "epoch": 1.526295056135513,
+      "grad_norm": 0.5449649691581726,
+      "learning_rate": 0.00029430261766261233,
+      "loss": 0.1773,
+      "step": 3875
+    },
+    {
+      "epoch": 1.5361433917667915,
+      "grad_norm": 0.3018980324268341,
+      "learning_rate": 0.00029425304071919615,
+      "loss": 0.2369,
+      "step": 3900
+    },
+    {
+      "epoch": 1.5459917273980697,
+      "grad_norm": 0.39579182863235474,
+      "learning_rate": 0.00029420346377578,
+      "loss": 0.1831,
+      "step": 3925
+    },
+    {
+      "epoch": 1.555840063029348,
+      "grad_norm": 0.31681591272354126,
+      "learning_rate": 0.0002941538868323638,
+      "loss": 0.2517,
+      "step": 3950
+    },
+    {
+      "epoch": 1.5656883986606265,
+      "grad_norm": 0.5165658593177795,
+      "learning_rate": 0.0002941043098889476,
+      "loss": 0.1675,
+      "step": 3975
+    },
+    {
+      "epoch": 1.5755367342919047,
+      "grad_norm": 0.3655818700790405,
+      "learning_rate": 0.00029405473294553144,
+      "loss": 0.2538,
+      "step": 4000
+    },
+    {
+      "epoch": 1.5755367342919047,
+      "eval_cer": 0.15729652546406472,
+      "eval_loss": 0.45375847816467285,
+      "eval_runtime": 162.5495,
+      "eval_samples_per_second": 7.536,
+      "eval_steps_per_second": 0.947,
+      "eval_wer": 0.5666596015260704,
+      "step": 4000
+    },
+    {
+      "epoch": 1.5853850699231828,
+      "grad_norm": 0.43671900033950806,
+      "learning_rate": 0.00029400515600211527,
+      "loss": 0.1934,
+      "step": 4025
+    },
+    {
+      "epoch": 1.5952334055544612,
+      "grad_norm": 0.39044424891471863,
+      "learning_rate": 0.0002939555790586991,
+      "loss": 0.2838,
+      "step": 4050
+    },
+    {
+      "epoch": 1.6050817411857397,
+      "grad_norm": 0.5004962086677551,
+      "learning_rate": 0.0002939060021152829,
+      "loss": 0.1721,
+      "step": 4075
+    },
+    {
+      "epoch": 1.6149300768170178,
+      "grad_norm": 0.3070147931575775,
+      "learning_rate": 0.0002938564251718667,
+      "loss": 0.2792,
+      "step": 4100
+    },
+    {
+      "epoch": 1.6247784124482962,
+      "grad_norm": 0.3623579740524292,
+      "learning_rate": 0.00029380684822845056,
+      "loss": 0.1816,
+      "step": 4125
+    },
+    {
+      "epoch": 1.6346267480795746,
+      "grad_norm": 0.32997018098831177,
+      "learning_rate": 0.0002937572712850343,
+      "loss": 0.2707,
+      "step": 4150
+    },
+    {
+      "epoch": 1.6444750837108528,
+      "grad_norm": 0.3477202355861664,
+      "learning_rate": 0.00029370769434161815,
+      "loss": 0.2166,
+      "step": 4175
+    },
+    {
+      "epoch": 1.6543234193421312,
+      "grad_norm": 0.3085598647594452,
+      "learning_rate": 0.00029365811739820197,
+      "loss": 0.2746,
+      "step": 4200
+    },
+    {
+      "epoch": 1.6641717549734096,
+      "grad_norm": 0.6120396852493286,
+      "learning_rate": 0.0002936085404547858,
+      "loss": 0.1782,
+      "step": 4225
+    },
+    {
+      "epoch": 1.6740200906046878,
+      "grad_norm": 0.4212220013141632,
+      "learning_rate": 0.0002935589635113696,
+      "loss": 0.2373,
+      "step": 4250
+    },
+    {
+      "epoch": 1.683868426235966,
+      "grad_norm": 0.5091245174407959,
+      "learning_rate": 0.00029350938656795344,
+      "loss": 0.1648,
+      "step": 4275
+    },
+    {
+      "epoch": 1.6937167618672444,
+      "grad_norm": 0.3296779692173004,
+      "learning_rate": 0.00029345980962453726,
+      "loss": 0.2734,
+      "step": 4300
+    },
+    {
+      "epoch": 1.7035650974985228,
+      "grad_norm": 0.2967052161693573,
+      "learning_rate": 0.0002934102326811211,
+      "loss": 0.1638,
+      "step": 4325
+    },
+    {
+      "epoch": 1.713413433129801,
+      "grad_norm": 0.34322819113731384,
+      "learning_rate": 0.0002933606557377049,
+      "loss": 0.222,
+      "step": 4350
+    },
+    {
+      "epoch": 1.7232617687610794,
+      "grad_norm": 0.2758971154689789,
+      "learning_rate": 0.00029331107879428873,
+      "loss": 0.1613,
+      "step": 4375
+    },
+    {
+      "epoch": 1.7331101043923578,
+      "grad_norm": 0.3527407944202423,
+      "learning_rate": 0.0002932615018508725,
+      "loss": 0.2606,
+      "step": 4400
+    },
+    {
+      "epoch": 1.742958440023636,
+      "grad_norm": 0.3466641902923584,
+      "learning_rate": 0.0002932119249074564,
+      "loss": 0.1706,
+      "step": 4425
+    },
+    {
+      "epoch": 1.7528067756549142,
+      "grad_norm": 0.34825262427330017,
+      "learning_rate": 0.00029316234796404015,
+      "loss": 0.2187,
+      "step": 4450
+    },
+    {
+      "epoch": 1.7626551112861928,
+      "grad_norm": 0.46970999240875244,
+      "learning_rate": 0.00029311277102062397,
+      "loss": 0.1874,
+      "step": 4475
+    },
+    {
+      "epoch": 1.772503446917471,
+      "grad_norm": 0.8190127015113831,
+      "learning_rate": 0.0002930631940772078,
+      "loss": 0.2215,
+      "step": 4500
+    },
+    {
+      "epoch": 1.772503446917471,
+      "eval_cer": 0.15814374107567825,
+      "eval_loss": 0.4648754298686981,
+      "eval_runtime": 160.5904,
+      "eval_samples_per_second": 7.628,
+      "eval_steps_per_second": 0.959,
+      "eval_wer": 0.5615197117422637,
+      "step": 4500
+    },
+    {
+      "epoch": 1.7823517825487492,
+      "grad_norm": 0.651522159576416,
+      "learning_rate": 0.0002930136171337916,
+      "loss": 0.1959,
+      "step": 4525
+    },
+    {
+      "epoch": 1.7922001181800276,
+      "grad_norm": 0.29321911931037903,
+      "learning_rate": 0.00029296404019037544,
+      "loss": 0.2361,
+      "step": 4550
+    },
+    {
+      "epoch": 1.802048453811306,
+      "grad_norm": 0.5241197943687439,
+      "learning_rate": 0.00029291446324695926,
+      "loss": 0.2164,
+      "step": 4575
+    },
+    {
+      "epoch": 1.8118967894425841,
+      "grad_norm": 0.39373457431793213,
+      "learning_rate": 0.0002928648863035431,
+      "loss": 0.2354,
+      "step": 4600
+    },
+    {
+      "epoch": 1.8217451250738625,
+      "grad_norm": 0.4190231263637543,
+      "learning_rate": 0.0002928153093601269,
+      "loss": 0.1639,
+      "step": 4625
+    },
+    {
+      "epoch": 1.831593460705141,
+      "grad_norm": 0.29849475622177124,
+      "learning_rate": 0.00029276573241671073,
+      "loss": 0.2495,
+      "step": 4650
+    },
+    {
+      "epoch": 1.8414417963364191,
+      "grad_norm": 0.4116981625556946,
+      "learning_rate": 0.0002927161554732945,
+      "loss": 0.1705,
+      "step": 4675
+    },
+    {
+      "epoch": 1.8512901319676973,
+      "grad_norm": 0.2837681472301483,
+      "learning_rate": 0.0002926665785298784,
+      "loss": 0.2358,
+      "step": 4700
+    },
+    {
+      "epoch": 1.8611384675989757,
+      "grad_norm": 0.41158556938171387,
+      "learning_rate": 0.00029261700158646214,
+      "loss": 0.1573,
+      "step": 4725
+    },
+    {
+      "epoch": 1.8709868032302541,
+      "grad_norm": 0.2807323932647705,
+      "learning_rate": 0.00029256742464304597,
+      "loss": 0.2292,
+      "step": 4750
+    },
+    {
+      "epoch": 1.8808351388615323,
+      "grad_norm": 0.4338144361972809,
+      "learning_rate": 0.0002925178476996298,
+      "loss": 0.2213,
+      "step": 4775
+    },
+    {
+      "epoch": 1.8906834744928107,
+      "grad_norm": 0.27579835057258606,
+      "learning_rate": 0.0002924682707562136,
+      "loss": 0.2297,
+      "step": 4800
+    },
+    {
+      "epoch": 1.9005318101240891,
+      "grad_norm": 0.3741152286529541,
+      "learning_rate": 0.00029241869381279743,
+      "loss": 0.1735,
+      "step": 4825
+    },
+    {
+      "epoch": 1.9103801457553673,
+      "grad_norm": 0.2773807942867279,
+      "learning_rate": 0.00029236911686938126,
+      "loss": 0.2162,
+      "step": 4850
+    },
+    {
+      "epoch": 1.9202284813866457,
+      "grad_norm": 0.32655900716781616,
+      "learning_rate": 0.0002923195399259651,
+      "loss": 0.1632,
+      "step": 4875
+    },
+    {
+      "epoch": 1.930076817017924,
+      "grad_norm": 0.5068430304527283,
+      "learning_rate": 0.0002922699629825489,
+      "loss": 0.2551,
+      "step": 4900
+    },
+    {
+      "epoch": 1.9399251526492023,
+      "grad_norm": 0.39267122745513916,
+      "learning_rate": 0.00029222038603913267,
+      "loss": 0.194,
+      "step": 4925
+    },
+    {
+      "epoch": 1.9497734882804805,
+      "grad_norm": 0.37297454476356506,
+      "learning_rate": 0.00029217080909571655,
+      "loss": 0.2512,
+      "step": 4950
+    },
+    {
+      "epoch": 1.9596218239117589,
+      "grad_norm": 0.33435386419296265,
+      "learning_rate": 0.0002921212321523003,
+      "loss": 0.1674,
+      "step": 4975
+    },
+    {
+      "epoch": 1.9694701595430373,
+      "grad_norm": 0.45720335841178894,
+      "learning_rate": 0.0002920716552088842,
+      "loss": 0.2329,
+      "step": 5000
+    },
+    {
+      "epoch": 1.9694701595430373,
+      "eval_cer": 0.15485007139457402,
+      "eval_loss": 0.4407041072845459,
+      "eval_runtime": 161.0078,
+      "eval_samples_per_second": 7.608,
+      "eval_steps_per_second": 0.956,
+      "eval_wer": 0.5582874099194574,
+      "step": 5000
+    },
+    {
+      "epoch": 1.9793184951743155,
+      "grad_norm": 0.509185791015625,
+      "learning_rate": 0.00029202207826546796,
+      "loss": 0.1547,
+      "step": 5025
+    },
+    {
+      "epoch": 1.9891668308055939,
+      "grad_norm": 0.7026469707489014,
+      "learning_rate": 0.0002919725013220518,
+      "loss": 0.2667,
+      "step": 5050
+    },
+    {
+      "epoch": 1.9990151664368723,
+      "grad_norm": 0.48996204137802124,
+      "learning_rate": 0.0002919229243786356,
+      "loss": 0.186,
+      "step": 5075
+    },
+    {
+      "epoch": 2.008666535355525,
+      "grad_norm": 0.29709795117378235,
+      "learning_rate": 0.00029187334743521943,
+      "loss": 0.2121,
+      "step": 5100
+    },
+    {
+      "epoch": 2.0185148709868033,
+      "grad_norm": 0.6418112516403198,
+      "learning_rate": 0.00029182377049180325,
+      "loss": 0.1564,
+      "step": 5125
+    },
+    {
+      "epoch": 2.0283632066180814,
+      "grad_norm": 0.3827853500843048,
+      "learning_rate": 0.0002917741935483871,
+      "loss": 0.187,
+      "step": 5150
+    },
+    {
+      "epoch": 2.03821154224936,
+      "grad_norm": 0.3637414574623108,
+      "learning_rate": 0.0002917246166049709,
+      "loss": 0.1498,
+      "step": 5175
+    },
+    {
+      "epoch": 2.0480598778806383,
+      "grad_norm": 0.3048696219921112,
+      "learning_rate": 0.0002916750396615547,
+      "loss": 0.1954,
+      "step": 5200
+    },
+    {
+      "epoch": 2.0579082135119164,
+      "grad_norm": 0.4234159290790558,
+      "learning_rate": 0.00029162546271813855,
+      "loss": 0.1739,
+      "step": 5225
+    },
+    {
+      "epoch": 2.0677565491431946,
+      "grad_norm": 0.6576627492904663,
+      "learning_rate": 0.0002915758857747223,
+      "loss": 0.2164,
+      "step": 5250
+    },
+    {
+      "epoch": 2.0776048847744732,
+      "grad_norm": 0.5567516088485718,
+      "learning_rate": 0.0002915263088313062,
+      "loss": 0.1604,
+      "step": 5275
+    },
+    {
+      "epoch": 2.0874532204057514,
+      "grad_norm": 0.3496594727039337,
+      "learning_rate": 0.00029147673188788996,
+      "loss": 0.2653,
+      "step": 5300
+    },
+    {
+      "epoch": 2.0973015560370296,
+      "grad_norm": 0.544669508934021,
+      "learning_rate": 0.0002914271549444738,
+      "loss": 0.1378,
+      "step": 5325
+    },
+    {
+      "epoch": 2.1071498916683082,
+      "grad_norm": 0.39314302802085876,
+      "learning_rate": 0.0002913775780010576,
+      "loss": 0.2019,
+      "step": 5350
+    },
+    {
+      "epoch": 2.1169982272995864,
+      "grad_norm": 0.4864466190338135,
+      "learning_rate": 0.00029132800105764143,
+      "loss": 0.1509,
+      "step": 5375
+    },
+    {
+      "epoch": 2.1268465629308646,
+      "grad_norm": 0.3684788644313812,
+      "learning_rate": 0.00029127842411422525,
+      "loss": 0.2032,
+      "step": 5400
+    },
+    {
+      "epoch": 2.1366948985621432,
+      "grad_norm": 0.503184974193573,
+      "learning_rate": 0.0002912288471708091,
+      "loss": 0.164,
+      "step": 5425
+    },
+    {
+      "epoch": 2.1465432341934214,
+      "grad_norm": 0.28915172815322876,
+      "learning_rate": 0.0002911792702273929,
+      "loss": 0.3365,
+      "step": 5450
+    },
+    {
+      "epoch": 2.1563915698246996,
+      "grad_norm": 0.449964314699173,
+      "learning_rate": 0.0002911296932839767,
+      "loss": 0.1741,
+      "step": 5475
+    },
+    {
+      "epoch": 2.1662399054559778,
+      "grad_norm": 0.30920708179473877,
+      "learning_rate": 0.0002910801163405605,
+      "loss": 0.2111,
+      "step": 5500
+    },
+    {
+      "epoch": 2.1662399054559778,
+      "eval_cer": 0.15415516420752023,
+      "eval_loss": 0.4676578938961029,
+      "eval_runtime": 161.7616,
+      "eval_samples_per_second": 7.573,
+      "eval_steps_per_second": 0.952,
+      "eval_wer": 0.5528825773632895,
+      "step": 5500
+    },
+    {
+      "epoch": 2.1760882410872564,
+      "grad_norm": 0.4080190062522888,
+      "learning_rate": 0.00029103053939714437,
+      "loss": 0.1527,
+      "step": 5525
+    },
+    {
+      "epoch": 2.1859365767185346,
+      "grad_norm": 0.2396317571401596,
+      "learning_rate": 0.00029098096245372813,
+      "loss": 0.1957,
+      "step": 5550
+    },
+    {
+      "epoch": 2.1957849123498128,
+      "grad_norm": 0.5699915885925293,
+      "learning_rate": 0.000290931385510312,
+      "loss": 0.1405,
+      "step": 5575
+    },
+    {
+      "epoch": 2.2056332479810914,
+      "grad_norm": 0.20270849764347076,
+      "learning_rate": 0.0002908818085668958,
+      "loss": 0.2075,
+      "step": 5600
+    },
+    {
+      "epoch": 2.2154815836123696,
+      "grad_norm": 0.4552954435348511,
+      "learning_rate": 0.0002908322316234796,
+      "loss": 0.1705,
+      "step": 5625
+    },
+    {
+      "epoch": 2.2253299192436478,
+      "grad_norm": 0.23801960051059723,
+      "learning_rate": 0.0002907826546800634,
+      "loss": 0.1836,
+      "step": 5650
+    },
+    {
+      "epoch": 2.235178254874926,
+      "grad_norm": 0.349804550409317,
+      "learning_rate": 0.00029073307773664725,
+      "loss": 0.1499,
+      "step": 5675
+    },
+    {
+      "epoch": 2.2450265905062046,
+      "grad_norm": 0.24956457316875458,
+      "learning_rate": 0.00029068350079323107,
+      "loss": 0.2026,
+      "step": 5700
+    },
+    {
+      "epoch": 2.2548749261374827,
+      "grad_norm": 0.4090624749660492,
+      "learning_rate": 0.0002906339238498149,
+      "loss": 0.1576,
+      "step": 5725
+    },
+    {
+      "epoch": 2.264723261768761,
+      "grad_norm": 0.2871131896972656,
+      "learning_rate": 0.0002905843469063987,
+      "loss": 0.2075,
+      "step": 5750
+    },
+    {
+      "epoch": 2.2745715974000396,
+      "grad_norm": 0.4429128170013428,
+      "learning_rate": 0.00029053476996298254,
+      "loss": 0.1918,
+      "step": 5775
+    },
+    {
+      "epoch": 2.2844199330313177,
+      "grad_norm": 0.515388548374176,
+      "learning_rate": 0.00029048519301956636,
+      "loss": 0.2296,
+      "step": 5800
+    },
+    {
+      "epoch": 2.294268268662596,
+      "grad_norm": 0.3795805275440216,
+      "learning_rate": 0.00029043561607615013,
+      "loss": 0.1521,
+      "step": 5825
+    },
+    {
+      "epoch": 2.304116604293874,
+      "grad_norm": 0.2490343451499939,
+      "learning_rate": 0.000290386039132734,
+      "loss": 0.1812,
+      "step": 5850
+    },
+    {
+      "epoch": 2.3139649399251527,
+      "grad_norm": 0.4595893621444702,
+      "learning_rate": 0.0002903364621893178,
+      "loss": 0.1662,
+      "step": 5875
+    },
+    {
+      "epoch": 2.323813275556431,
+      "grad_norm": 0.4148694574832916,
+      "learning_rate": 0.0002902868852459016,
+      "loss": 0.207,
+      "step": 5900
+    },
+    {
+      "epoch": 2.333661611187709,
+      "grad_norm": 0.5799115300178528,
+      "learning_rate": 0.0002902373083024854,
+      "loss": 0.1774,
+      "step": 5925
+    },
+    {
+      "epoch": 2.3435099468189877,
+      "grad_norm": 0.2783784568309784,
+      "learning_rate": 0.00029018773135906924,
+      "loss": 0.1981,
+      "step": 5950
+    },
+    {
+      "epoch": 2.353358282450266,
+      "grad_norm": 0.5307630300521851,
+      "learning_rate": 0.00029013815441565307,
+      "loss": 0.165,
+      "step": 5975
+    },
+    {
+      "epoch": 2.363206618081544,
+      "grad_norm": 0.29262271523475647,
+      "learning_rate": 0.0002900885774722369,
+      "loss": 0.2018,
+      "step": 6000
+    },
+    {
+      "epoch": 2.363206618081544,
+      "eval_cer": 0.15306044740599714,
+      "eval_loss": 0.4573569595813751,
+      "eval_runtime": 161.9027,
+      "eval_samples_per_second": 7.566,
+      "eval_steps_per_second": 0.951,
+      "eval_wer": 0.550710046629928,
+      "step": 6000
+    },
+    {
+      "epoch": 2.3730549537128227,
+      "grad_norm": 0.6890036463737488,
+      "learning_rate": 0.0002900390005288207,
+      "loss": 0.1505,
+      "step": 6025
+    },
+    {
+      "epoch": 2.382903289344101,
+      "grad_norm": 0.25754716992378235,
+      "learning_rate": 0.00028998942358540454,
+      "loss": 0.2199,
+      "step": 6050
+    },
+    {
+      "epoch": 2.392751624975379,
+      "grad_norm": 0.40421202778816223,
+      "learning_rate": 0.0002899398466419883,
+      "loss": 0.1581,
+      "step": 6075
+    },
+    {
+      "epoch": 2.4025999606066577,
+      "grad_norm": 0.3112453520298004,
+      "learning_rate": 0.0002898902696985722,
+      "loss": 0.2001,
+      "step": 6100
+    },
+    {
+      "epoch": 2.412448296237936,
+      "grad_norm": 0.4719525873661041,
+      "learning_rate": 0.00028984069275515595,
+      "loss": 0.1381,
+      "step": 6125
+    },
+    {
+      "epoch": 2.422296631869214,
+      "grad_norm": 0.25783413648605347,
+      "learning_rate": 0.00028979111581173983,
+      "loss": 0.2042,
+      "step": 6150
+    },
+    {
+      "epoch": 2.4321449675004922,
+      "grad_norm": 0.46275967359542847,
+      "learning_rate": 0.0002897415388683236,
+      "loss": 0.1668,
+      "step": 6175
+    },
+    {
+      "epoch": 2.441993303131771,
+      "grad_norm": 0.41631945967674255,
+      "learning_rate": 0.0002896919619249074,
+      "loss": 0.2147,
+      "step": 6200
+    },
+    {
+      "epoch": 2.451841638763049,
+      "grad_norm": 0.44267284870147705,
+      "learning_rate": 0.00028964238498149124,
+      "loss": 0.1643,
+      "step": 6225
+    },
+    {
+      "epoch": 2.4616899743943272,
+      "grad_norm": 0.29726967215538025,
+      "learning_rate": 0.00028959280803807506,
+      "loss": 0.1831,
+      "step": 6250
+    },
+    {
+      "epoch": 2.471538310025606,
+      "grad_norm": 0.597053050994873,
+      "learning_rate": 0.0002895432310946589,
+      "loss": 0.1464,
+      "step": 6275
+    },
+    {
+      "epoch": 2.481386645656884,
+      "grad_norm": 0.21625810861587524,
+      "learning_rate": 0.0002894936541512427,
+      "loss": 0.2042,
+      "step": 6300
+    },
+    {
+      "epoch": 2.4912349812881622,
+      "grad_norm": 0.7981178164482117,
+      "learning_rate": 0.00028944407720782653,
+      "loss": 0.1417,
+      "step": 6325
+    },
+    {
+      "epoch": 2.5010833169194404,
+      "grad_norm": 0.26910632848739624,
+      "learning_rate": 0.00028939450026441036,
+      "loss": 0.223,
+      "step": 6350
+    },
+    {
+      "epoch": 2.510931652550719,
+      "grad_norm": 0.6753020882606506,
+      "learning_rate": 0.0002893449233209942,
+      "loss": 0.1597,
+      "step": 6375
+    },
+    {
+      "epoch": 2.5207799881819972,
+      "grad_norm": 0.2274799793958664,
+      "learning_rate": 0.000289295346377578,
+      "loss": 0.2102,
+      "step": 6400
+    },
+    {
+      "epoch": 2.5306283238132754,
+      "grad_norm": 0.2969699501991272,
+      "learning_rate": 0.00028924576943416177,
+      "loss": 0.17,
+      "step": 6425
+    },
+    {
+      "epoch": 2.540476659444554,
+      "grad_norm": 0.48988422751426697,
+      "learning_rate": 0.0002891961924907456,
+      "loss": 0.1838,
+      "step": 6450
+    },
+    {
+      "epoch": 2.550324995075832,
+      "grad_norm": 0.568366527557373,
+      "learning_rate": 0.0002891466155473294,
+      "loss": 0.1455,
+      "step": 6475
+    },
+    {
+      "epoch": 2.5601733307071104,
+      "grad_norm": 0.35780784487724304,
+      "learning_rate": 0.00028909703860391324,
+      "loss": 0.2421,
+      "step": 6500
+    },
+    {
+      "epoch": 2.5601733307071104,
+      "eval_cer": 0.15275583055687766,
+      "eval_loss": 0.4416634440422058,
+      "eval_runtime": 161.0854,
+      "eval_samples_per_second": 7.605,
+      "eval_steps_per_second": 0.956,
+      "eval_wer": 0.5499682068673166,
+      "step": 6500
+    },
+    {
+      "epoch": 2.5601733307071104,
+      "step": 6500,
+      "total_flos": 2.4200639627895153e+19,
+      "train_loss": 0.5404087718450106,
+      "train_runtime": 33172.3648,
+      "train_samples_per_second": 73.451,
+      "train_steps_per_second": 4.591
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 152280,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 3
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.4200639627895153e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}