End of training

Browse files

Files changed (6) hide show

README.md +15 -2
all_results.json +15 -0
eval_results.json +9 -0
runs/Oct06_08-50-04_d2d35f196850/events.out.tfevents.1728246234.d2d35f196850.376.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -3,11 +3,24 @@ license: apache-2.0
 base_model: openai/whisper-large-v3
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
 - name: whisper-large-v3-ec
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +28,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v3-ec
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4733
 - Wer: 0.2137

 base_model: openai/whisper-large-v3
 tags:
 - generated_from_trainer
+datasets:
+- wanasash/enwaucymraeg
 metrics:
 - wer
 model-index:
 - name: whisper-large-v3-ec
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: wanasash/enwaucymraeg default
+      type: wanasash/enwaucymraeg
+      args: default
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.21372622155911974
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # whisper-large-v3-ec
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the wanasash/enwaucymraeg default dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4733
 - Wer: 0.2137

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 68.02721088435374,
+    "eval_loss": 0.47325974702835083,
+    "eval_runtime": 101.545,
+    "eval_samples": 260,
+    "eval_samples_per_second": 2.56,
+    "eval_steps_per_second": 0.167,
+    "eval_wer": 0.21372622155911974,
+    "total_flos": 5.4059634054660096e+20,
+    "train_loss": 0.028010966634354553,
+    "train_runtime": 44897.3386,
+    "train_samples": 2339,
+    "train_samples_per_second": 3.564,
+    "train_steps_per_second": 0.111
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 68.02721088435374,
+    "eval_loss": 0.47325974702835083,
+    "eval_runtime": 101.545,
+    "eval_samples": 260,
+    "eval_samples_per_second": 2.56,
+    "eval_steps_per_second": 0.167,
+    "eval_wer": 0.21372622155911974
+}

runs/Oct06_08-50-04_d2d35f196850/events.out.tfevents.1728246234.d2d35f196850.376.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e7e35ea8d826498e1e596e69edcbdab97157816b347d90bbc71d605f0c8df5a
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 68.02721088435374,
+    "total_flos": 5.4059634054660096e+20,
+    "train_loss": 0.028010966634354553,
+    "train_runtime": 44897.3386,
+    "train_samples": 2339,
+    "train_samples_per_second": 3.564,
+    "train_steps_per_second": 0.111
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 68.02721088435374,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3401360544217687,
+      "grad_norm": 7.277314186096191,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 1.2508,
+      "step": 25
+    },
+    {
+      "epoch": 0.6802721088435374,
+      "grad_norm": 4.874781131744385,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.8343,
+      "step": 50
+    },
+    {
+      "epoch": 1.0204081632653061,
+      "grad_norm": 3.6367509365081787,
+      "learning_rate": 1.5e-06,
+      "loss": 0.5016,
+      "step": 75
+    },
+    {
+      "epoch": 1.3605442176870748,
+      "grad_norm": 3.666282892227173,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.3941,
+      "step": 100
+    },
+    {
+      "epoch": 1.7006802721088436,
+      "grad_norm": 3.6144936084747314,
+      "learning_rate": 2.5e-06,
+      "loss": 0.3451,
+      "step": 125
+    },
+    {
+      "epoch": 2.0408163265306123,
+      "grad_norm": 2.4187328815460205,
+      "learning_rate": 3e-06,
+      "loss": 0.3123,
+      "step": 150
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 3.255674362182617,
+      "learning_rate": 3.5e-06,
+      "loss": 0.2263,
+      "step": 175
+    },
+    {
+      "epoch": 2.7210884353741496,
+      "grad_norm": 3.5080747604370117,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.2111,
+      "step": 200
+    },
+    {
+      "epoch": 3.061224489795918,
+      "grad_norm": 2.3079421520233154,
+      "learning_rate": 4.5e-06,
+      "loss": 0.201,
+      "step": 225
+    },
+    {
+      "epoch": 3.4013605442176873,
+      "grad_norm": 2.2684943675994873,
+      "learning_rate": 5e-06,
+      "loss": 0.1196,
+      "step": 250
+    },
+    {
+      "epoch": 3.741496598639456,
+      "grad_norm": 2.2747371196746826,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.1258,
+      "step": 275
+    },
+    {
+      "epoch": 4.081632653061225,
+      "grad_norm": 2.270559787750244,
+      "learning_rate": 6e-06,
+      "loss": 0.1133,
+      "step": 300
+    },
+    {
+      "epoch": 4.421768707482993,
+      "grad_norm": 2.20074200630188,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.0682,
+      "step": 325
+    },
+    {
+      "epoch": 4.761904761904762,
+      "grad_norm": 2.804894208908081,
+      "learning_rate": 7e-06,
+      "loss": 0.0731,
+      "step": 350
+    },
+    {
+      "epoch": 5.1020408163265305,
+      "grad_norm": 2.6068227291107178,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.0672,
+      "step": 375
+    },
+    {
+      "epoch": 5.442176870748299,
+      "grad_norm": 3.2111849784851074,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.0478,
+      "step": 400
+    },
+    {
+      "epoch": 5.782312925170068,
+      "grad_norm": 1.5456165075302124,
+      "learning_rate": 8.5e-06,
+      "loss": 0.0505,
+      "step": 425
+    },
+    {
+      "epoch": 6.122448979591836,
+      "grad_norm": 1.4710639715194702,
+      "learning_rate": 9e-06,
+      "loss": 0.0473,
+      "step": 450
+    },
+    {
+      "epoch": 6.462585034013605,
+      "grad_norm": 2.914450168609619,
+      "learning_rate": 9.5e-06,
+      "loss": 0.0368,
+      "step": 475
+    },
+    {
+      "epoch": 6.802721088435375,
+      "grad_norm": 2.29419207572937,
+      "learning_rate": 1e-05,
+      "loss": 0.0441,
+      "step": 500
+    },
+    {
+      "epoch": 7.142857142857143,
+      "grad_norm": 0.8864838480949402,
+      "learning_rate": 9.944444444444445e-06,
+      "loss": 0.0412,
+      "step": 525
+    },
+    {
+      "epoch": 7.482993197278912,
+      "grad_norm": 1.3316959142684937,
+      "learning_rate": 9.88888888888889e-06,
+      "loss": 0.0347,
+      "step": 550
+    },
+    {
+      "epoch": 7.8231292517006805,
+      "grad_norm": 2.046053647994995,
+      "learning_rate": 9.833333333333333e-06,
+      "loss": 0.0299,
+      "step": 575
+    },
+    {
+      "epoch": 8.16326530612245,
+      "grad_norm": 1.579432725906372,
+      "learning_rate": 9.777777777777779e-06,
+      "loss": 0.0268,
+      "step": 600
+    },
+    {
+      "epoch": 8.503401360544217,
+      "grad_norm": 1.7325966358184814,
+      "learning_rate": 9.722222222222223e-06,
+      "loss": 0.019,
+      "step": 625
+    },
+    {
+      "epoch": 8.843537414965986,
+      "grad_norm": 1.6740517616271973,
+      "learning_rate": 9.666666666666667e-06,
+      "loss": 0.0262,
+      "step": 650
+    },
+    {
+      "epoch": 9.183673469387756,
+      "grad_norm": 0.65809166431427,
+      "learning_rate": 9.611111111111112e-06,
+      "loss": 0.0183,
+      "step": 675
+    },
+    {
+      "epoch": 9.523809523809524,
+      "grad_norm": 2.288142442703247,
+      "learning_rate": 9.555555555555556e-06,
+      "loss": 0.0177,
+      "step": 700
+    },
+    {
+      "epoch": 9.863945578231293,
+      "grad_norm": 1.3612937927246094,
+      "learning_rate": 9.5e-06,
+      "loss": 0.0179,
+      "step": 725
+    },
+    {
+      "epoch": 10.204081632653061,
+      "grad_norm": 1.2634042501449585,
+      "learning_rate": 9.444444444444445e-06,
+      "loss": 0.0142,
+      "step": 750
+    },
+    {
+      "epoch": 10.54421768707483,
+      "grad_norm": 0.8320721387863159,
+      "learning_rate": 9.38888888888889e-06,
+      "loss": 0.0131,
+      "step": 775
+    },
+    {
+      "epoch": 10.884353741496598,
+      "grad_norm": 1.1112641096115112,
+      "learning_rate": 9.333333333333334e-06,
+      "loss": 0.0148,
+      "step": 800
+    },
+    {
+      "epoch": 11.224489795918368,
+      "grad_norm": 0.4855366349220276,
+      "learning_rate": 9.277777777777778e-06,
+      "loss": 0.0124,
+      "step": 825
+    },
+    {
+      "epoch": 11.564625850340136,
+      "grad_norm": 1.5221891403198242,
+      "learning_rate": 9.222222222222224e-06,
+      "loss": 0.0116,
+      "step": 850
+    },
+    {
+      "epoch": 11.904761904761905,
+      "grad_norm": 1.4894431829452515,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 0.0144,
+      "step": 875
+    },
+    {
+      "epoch": 12.244897959183673,
+      "grad_norm": 1.2897030115127563,
+      "learning_rate": 9.111111111111112e-06,
+      "loss": 0.0099,
+      "step": 900
+    },
+    {
+      "epoch": 12.585034013605442,
+      "grad_norm": 0.5321520566940308,
+      "learning_rate": 9.055555555555556e-06,
+      "loss": 0.0068,
+      "step": 925
+    },
+    {
+      "epoch": 12.92517006802721,
+      "grad_norm": 1.2740304470062256,
+      "learning_rate": 9e-06,
+      "loss": 0.01,
+      "step": 950
+    },
+    {
+      "epoch": 13.26530612244898,
+      "grad_norm": 0.6653080582618713,
+      "learning_rate": 8.944444444444446e-06,
+      "loss": 0.0072,
+      "step": 975
+    },
+    {
+      "epoch": 13.60544217687075,
+      "grad_norm": 1.0347912311553955,
+      "learning_rate": 8.888888888888888e-06,
+      "loss": 0.0091,
+      "step": 1000
+    },
+    {
+      "epoch": 13.60544217687075,
+      "eval_loss": 0.4027358293533325,
+      "eval_runtime": 102.3269,
+      "eval_samples_per_second": 2.541,
+      "eval_steps_per_second": 0.166,
+      "eval_wer": 0.21894815367400225,
+      "step": 1000
+    },
+    {
+      "epoch": 13.945578231292517,
+      "grad_norm": 0.6724150776863098,
+      "learning_rate": 8.833333333333334e-06,
+      "loss": 0.0119,
+      "step": 1025
+    },
+    {
+      "epoch": 14.285714285714286,
+      "grad_norm": 0.44545045495033264,
+      "learning_rate": 8.777777777777778e-06,
+      "loss": 0.0069,
+      "step": 1050
+    },
+    {
+      "epoch": 14.625850340136054,
+      "grad_norm": 1.5290805101394653,
+      "learning_rate": 8.722222222222224e-06,
+      "loss": 0.0078,
+      "step": 1075
+    },
+    {
+      "epoch": 14.965986394557824,
+      "grad_norm": 0.5309610962867737,
+      "learning_rate": 8.666666666666668e-06,
+      "loss": 0.0053,
+      "step": 1100
+    },
+    {
+      "epoch": 15.306122448979592,
+      "grad_norm": 0.1730959564447403,
+      "learning_rate": 8.611111111111112e-06,
+      "loss": 0.0031,
+      "step": 1125
+    },
+    {
+      "epoch": 15.646258503401361,
+      "grad_norm": 0.06295394897460938,
+      "learning_rate": 8.555555555555556e-06,
+      "loss": 0.0044,
+      "step": 1150
+    },
+    {
+      "epoch": 15.986394557823129,
+      "grad_norm": 0.4541076421737671,
+      "learning_rate": 8.5e-06,
+      "loss": 0.0036,
+      "step": 1175
+    },
+    {
+      "epoch": 16.3265306122449,
+      "grad_norm": 0.24278602004051208,
+      "learning_rate": 8.444444444444446e-06,
+      "loss": 0.0045,
+      "step": 1200
+    },
+    {
+      "epoch": 16.666666666666668,
+      "grad_norm": 0.3119202256202698,
+      "learning_rate": 8.38888888888889e-06,
+      "loss": 0.0026,
+      "step": 1225
+    },
+    {
+      "epoch": 17.006802721088434,
+      "grad_norm": 0.7003461718559265,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.0031,
+      "step": 1250
+    },
+    {
+      "epoch": 17.346938775510203,
+      "grad_norm": 0.3013887107372284,
+      "learning_rate": 8.277777777777778e-06,
+      "loss": 0.0024,
+      "step": 1275
+    },
+    {
+      "epoch": 17.687074829931973,
+      "grad_norm": 0.054453521966934204,
+      "learning_rate": 8.222222222222222e-06,
+      "loss": 0.0018,
+      "step": 1300
+    },
+    {
+      "epoch": 18.027210884353742,
+      "grad_norm": 0.46942847967147827,
+      "learning_rate": 8.166666666666668e-06,
+      "loss": 0.0016,
+      "step": 1325
+    },
+    {
+      "epoch": 18.367346938775512,
+      "grad_norm": 0.051955267786979675,
+      "learning_rate": 8.111111111111112e-06,
+      "loss": 0.0014,
+      "step": 1350
+    },
+    {
+      "epoch": 18.707482993197278,
+      "grad_norm": 0.022960973903536797,
+      "learning_rate": 8.055555555555557e-06,
+      "loss": 0.0007,
+      "step": 1375
+    },
+    {
+      "epoch": 19.047619047619047,
+      "grad_norm": 0.060347653925418854,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.0023,
+      "step": 1400
+    },
+    {
+      "epoch": 19.387755102040817,
+      "grad_norm": 0.045802563428878784,
+      "learning_rate": 7.944444444444445e-06,
+      "loss": 0.0009,
+      "step": 1425
+    },
+    {
+      "epoch": 19.727891156462587,
+      "grad_norm": 0.03706446662545204,
+      "learning_rate": 7.88888888888889e-06,
+      "loss": 0.0009,
+      "step": 1450
+    },
+    {
+      "epoch": 20.068027210884352,
+      "grad_norm": 0.014720437116920948,
+      "learning_rate": 7.833333333333333e-06,
+      "loss": 0.0008,
+      "step": 1475
+    },
+    {
+      "epoch": 20.408163265306122,
+      "grad_norm": 0.01641876809298992,
+      "learning_rate": 7.77777777777778e-06,
+      "loss": 0.0005,
+      "step": 1500
+    },
+    {
+      "epoch": 20.74829931972789,
+      "grad_norm": 0.051012102514505386,
+      "learning_rate": 7.722222222222223e-06,
+      "loss": 0.0006,
+      "step": 1525
+    },
+    {
+      "epoch": 21.08843537414966,
+      "grad_norm": 0.07526645809412003,
+      "learning_rate": 7.666666666666667e-06,
+      "loss": 0.0006,
+      "step": 1550
+    },
+    {
+      "epoch": 21.428571428571427,
+      "grad_norm": 0.014325820840895176,
+      "learning_rate": 7.611111111111111e-06,
+      "loss": 0.0013,
+      "step": 1575
+    },
+    {
+      "epoch": 21.768707482993197,
+      "grad_norm": 0.013493416830897331,
+      "learning_rate": 7.555555555555556e-06,
+      "loss": 0.0008,
+      "step": 1600
+    },
+    {
+      "epoch": 22.108843537414966,
+      "grad_norm": 0.09651411324739456,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.0007,
+      "step": 1625
+    },
+    {
+      "epoch": 22.448979591836736,
+      "grad_norm": 0.00849719438701868,
+      "learning_rate": 7.444444444444445e-06,
+      "loss": 0.0004,
+      "step": 1650
+    },
+    {
+      "epoch": 22.7891156462585,
+      "grad_norm": 0.12997107207775116,
+      "learning_rate": 7.38888888888889e-06,
+      "loss": 0.0004,
+      "step": 1675
+    },
+    {
+      "epoch": 23.12925170068027,
+      "grad_norm": 0.017772994935512543,
+      "learning_rate": 7.333333333333333e-06,
+      "loss": 0.0004,
+      "step": 1700
+    },
+    {
+      "epoch": 23.46938775510204,
+      "grad_norm": 0.015189074911177158,
+      "learning_rate": 7.277777777777778e-06,
+      "loss": 0.0005,
+      "step": 1725
+    },
+    {
+      "epoch": 23.80952380952381,
+      "grad_norm": 0.015632469207048416,
+      "learning_rate": 7.222222222222223e-06,
+      "loss": 0.0005,
+      "step": 1750
+    },
+    {
+      "epoch": 24.14965986394558,
+      "grad_norm": 0.01429369393736124,
+      "learning_rate": 7.166666666666667e-06,
+      "loss": 0.0008,
+      "step": 1775
+    },
+    {
+      "epoch": 24.489795918367346,
+      "grad_norm": 0.009108461439609528,
+      "learning_rate": 7.111111111111112e-06,
+      "loss": 0.0004,
+      "step": 1800
+    },
+    {
+      "epoch": 24.829931972789115,
+      "grad_norm": 0.00893578864634037,
+      "learning_rate": 7.055555555555557e-06,
+      "loss": 0.0008,
+      "step": 1825
+    },
+    {
+      "epoch": 25.170068027210885,
+      "grad_norm": 0.010999013669788837,
+      "learning_rate": 7e-06,
+      "loss": 0.0017,
+      "step": 1850
+    },
+    {
+      "epoch": 25.510204081632654,
+      "grad_norm": 0.039021287113428116,
+      "learning_rate": 6.944444444444445e-06,
+      "loss": 0.001,
+      "step": 1875
+    },
+    {
+      "epoch": 25.85034013605442,
+      "grad_norm": 0.8390358686447144,
+      "learning_rate": 6.88888888888889e-06,
+      "loss": 0.0009,
+      "step": 1900
+    },
+    {
+      "epoch": 26.19047619047619,
+      "grad_norm": 0.02965502440929413,
+      "learning_rate": 6.833333333333334e-06,
+      "loss": 0.0013,
+      "step": 1925
+    },
+    {
+      "epoch": 26.53061224489796,
+      "grad_norm": 0.21533174812793732,
+      "learning_rate": 6.777777777777779e-06,
+      "loss": 0.0029,
+      "step": 1950
+    },
+    {
+      "epoch": 26.87074829931973,
+      "grad_norm": 0.06805889308452606,
+      "learning_rate": 6.7222222222222235e-06,
+      "loss": 0.0023,
+      "step": 1975
+    },
+    {
+      "epoch": 27.2108843537415,
+      "grad_norm": 0.13776488602161407,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.0026,
+      "step": 2000
+    },
+    {
+      "epoch": 27.2108843537415,
+      "eval_loss": 0.4282212555408478,
+      "eval_runtime": 102.1629,
+      "eval_samples_per_second": 2.545,
+      "eval_steps_per_second": 0.166,
+      "eval_wer": 0.22603506154419992,
+      "step": 2000
+    },
+    {
+      "epoch": 27.551020408163264,
+      "grad_norm": 0.7586637735366821,
+      "learning_rate": 6.6111111111111115e-06,
+      "loss": 0.0045,
+      "step": 2025
+    },
+    {
+      "epoch": 27.891156462585034,
+      "grad_norm": 0.19451989233493805,
+      "learning_rate": 6.555555555555556e-06,
+      "loss": 0.0051,
+      "step": 2050
+    },
+    {
+      "epoch": 28.231292517006803,
+      "grad_norm": 0.1951601356267929,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.0037,
+      "step": 2075
+    },
+    {
+      "epoch": 28.571428571428573,
+      "grad_norm": 0.3530046045780182,
+      "learning_rate": 6.444444444444445e-06,
+      "loss": 0.0037,
+      "step": 2100
+    },
+    {
+      "epoch": 28.91156462585034,
+      "grad_norm": 0.26006898283958435,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 0.0045,
+      "step": 2125
+    },
+    {
+      "epoch": 29.25170068027211,
+      "grad_norm": 1.6727913618087769,
+      "learning_rate": 6.333333333333333e-06,
+      "loss": 0.0034,
+      "step": 2150
+    },
+    {
+      "epoch": 29.591836734693878,
+      "grad_norm": 0.23586586117744446,
+      "learning_rate": 6.277777777777778e-06,
+      "loss": 0.0033,
+      "step": 2175
+    },
+    {
+      "epoch": 29.931972789115648,
+      "grad_norm": 2.080326557159424,
+      "learning_rate": 6.222222222222223e-06,
+      "loss": 0.0041,
+      "step": 2200
+    },
+    {
+      "epoch": 30.272108843537413,
+      "grad_norm": 0.43551284074783325,
+      "learning_rate": 6.166666666666667e-06,
+      "loss": 0.0056,
+      "step": 2225
+    },
+    {
+      "epoch": 30.612244897959183,
+      "grad_norm": 0.40780606865882874,
+      "learning_rate": 6.111111111111112e-06,
+      "loss": 0.0034,
+      "step": 2250
+    },
+    {
+      "epoch": 30.952380952380953,
+      "grad_norm": 0.2716953754425049,
+      "learning_rate": 6.055555555555555e-06,
+      "loss": 0.0059,
+      "step": 2275
+    },
+    {
+      "epoch": 31.292517006802722,
+      "grad_norm": 0.7849836945533752,
+      "learning_rate": 6e-06,
+      "loss": 0.004,
+      "step": 2300
+    },
+    {
+      "epoch": 31.632653061224488,
+      "grad_norm": 0.0561537891626358,
+      "learning_rate": 5.944444444444445e-06,
+      "loss": 0.0031,
+      "step": 2325
+    },
+    {
+      "epoch": 31.972789115646258,
+      "grad_norm": 0.2189192771911621,
+      "learning_rate": 5.88888888888889e-06,
+      "loss": 0.0025,
+      "step": 2350
+    },
+    {
+      "epoch": 32.31292517006803,
+      "grad_norm": 0.7395544052124023,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 0.0035,
+      "step": 2375
+    },
+    {
+      "epoch": 32.6530612244898,
+      "grad_norm": 0.8322564959526062,
+      "learning_rate": 5.777777777777778e-06,
+      "loss": 0.0029,
+      "step": 2400
+    },
+    {
+      "epoch": 32.993197278911566,
+      "grad_norm": 0.1809442937374115,
+      "learning_rate": 5.722222222222222e-06,
+      "loss": 0.004,
+      "step": 2425
+    },
+    {
+      "epoch": 33.333333333333336,
+      "grad_norm": 0.09690059721469879,
+      "learning_rate": 5.666666666666667e-06,
+      "loss": 0.0019,
+      "step": 2450
+    },
+    {
+      "epoch": 33.673469387755105,
+      "grad_norm": 0.29557883739471436,
+      "learning_rate": 5.611111111111112e-06,
+      "loss": 0.0023,
+      "step": 2475
+    },
+    {
+      "epoch": 34.01360544217687,
+      "grad_norm": 0.026952777057886124,
+      "learning_rate": 5.555555555555557e-06,
+      "loss": 0.001,
+      "step": 2500
+    },
+    {
+      "epoch": 34.35374149659864,
+      "grad_norm": 0.04151106998324394,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.0008,
+      "step": 2525
+    },
+    {
+      "epoch": 34.69387755102041,
+      "grad_norm": 0.01914023421704769,
+      "learning_rate": 5.444444444444445e-06,
+      "loss": 0.0012,
+      "step": 2550
+    },
+    {
+      "epoch": 35.034013605442176,
+      "grad_norm": 0.15637150406837463,
+      "learning_rate": 5.388888888888889e-06,
+      "loss": 0.0011,
+      "step": 2575
+    },
+    {
+      "epoch": 35.374149659863946,
+      "grad_norm": 0.18300969898700714,
+      "learning_rate": 5.333333333333334e-06,
+      "loss": 0.0013,
+      "step": 2600
+    },
+    {
+      "epoch": 35.714285714285715,
+      "grad_norm": 0.019685134291648865,
+      "learning_rate": 5.2777777777777785e-06,
+      "loss": 0.0009,
+      "step": 2625
+    },
+    {
+      "epoch": 36.054421768707485,
+      "grad_norm": 0.012847774662077427,
+      "learning_rate": 5.2222222222222226e-06,
+      "loss": 0.0004,
+      "step": 2650
+    },
+    {
+      "epoch": 36.394557823129254,
+      "grad_norm": 0.011110267601907253,
+      "learning_rate": 5.1666666666666675e-06,
+      "loss": 0.0003,
+      "step": 2675
+    },
+    {
+      "epoch": 36.734693877551024,
+      "grad_norm": 0.010116774588823318,
+      "learning_rate": 5.1111111111111115e-06,
+      "loss": 0.0003,
+      "step": 2700
+    },
+    {
+      "epoch": 37.074829931972786,
+      "grad_norm": 0.00764122698456049,
+      "learning_rate": 5.0555555555555555e-06,
+      "loss": 0.0003,
+      "step": 2725
+    },
+    {
+      "epoch": 37.414965986394556,
+      "grad_norm": 0.02383231371641159,
+      "learning_rate": 5e-06,
+      "loss": 0.0002,
+      "step": 2750
+    },
+    {
+      "epoch": 37.755102040816325,
+      "grad_norm": 0.006380359176546335,
+      "learning_rate": 4.944444444444445e-06,
+      "loss": 0.0002,
+      "step": 2775
+    },
+    {
+      "epoch": 38.095238095238095,
+      "grad_norm": 0.0054608238860964775,
+      "learning_rate": 4.888888888888889e-06,
+      "loss": 0.0002,
+      "step": 2800
+    },
+    {
+      "epoch": 38.435374149659864,
+      "grad_norm": 0.005097352433949709,
+      "learning_rate": 4.833333333333333e-06,
+      "loss": 0.0002,
+      "step": 2825
+    },
+    {
+      "epoch": 38.775510204081634,
+      "grad_norm": 0.005154197569936514,
+      "learning_rate": 4.777777777777778e-06,
+      "loss": 0.0002,
+      "step": 2850
+    },
+    {
+      "epoch": 39.1156462585034,
+      "grad_norm": 0.005586385726928711,
+      "learning_rate": 4.722222222222222e-06,
+      "loss": 0.0002,
+      "step": 2875
+    },
+    {
+      "epoch": 39.45578231292517,
+      "grad_norm": 0.005172679666429758,
+      "learning_rate": 4.666666666666667e-06,
+      "loss": 0.0002,
+      "step": 2900
+    },
+    {
+      "epoch": 39.795918367346935,
+      "grad_norm": 0.004589286167174578,
+      "learning_rate": 4.611111111111112e-06,
+      "loss": 0.0002,
+      "step": 2925
+    },
+    {
+      "epoch": 40.136054421768705,
+      "grad_norm": 0.005452868994325399,
+      "learning_rate": 4.555555555555556e-06,
+      "loss": 0.0002,
+      "step": 2950
+    },
+    {
+      "epoch": 40.476190476190474,
+      "grad_norm": 0.004887877497822046,
+      "learning_rate": 4.5e-06,
+      "loss": 0.0002,
+      "step": 2975
+    },
+    {
+      "epoch": 40.816326530612244,
+      "grad_norm": 0.00491141015663743,
+      "learning_rate": 4.444444444444444e-06,
+      "loss": 0.0002,
+      "step": 3000
+    },
+    {
+      "epoch": 40.816326530612244,
+      "eval_loss": 0.4444006681442261,
+      "eval_runtime": 102.1694,
+      "eval_samples_per_second": 2.545,
+      "eval_steps_per_second": 0.166,
+      "eval_wer": 0.21782916822081314,
+      "step": 3000
+    },
+    {
+      "epoch": 41.156462585034014,
+      "grad_norm": 0.005289255175739527,
+      "learning_rate": 4.388888888888889e-06,
+      "loss": 0.0002,
+      "step": 3025
+    },
+    {
+      "epoch": 41.49659863945578,
+      "grad_norm": 0.004311501048505306,
+      "learning_rate": 4.333333333333334e-06,
+      "loss": 0.0001,
+      "step": 3050
+    },
+    {
+      "epoch": 41.83673469387755,
+      "grad_norm": 0.004461329896003008,
+      "learning_rate": 4.277777777777778e-06,
+      "loss": 0.0002,
+      "step": 3075
+    },
+    {
+      "epoch": 42.17687074829932,
+      "grad_norm": 0.0043508694507181644,
+      "learning_rate": 4.222222222222223e-06,
+      "loss": 0.0001,
+      "step": 3100
+    },
+    {
+      "epoch": 42.51700680272109,
+      "grad_norm": 0.004281959030777216,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.0001,
+      "step": 3125
+    },
+    {
+      "epoch": 42.857142857142854,
+      "grad_norm": 0.004064427223056555,
+      "learning_rate": 4.111111111111111e-06,
+      "loss": 0.0001,
+      "step": 3150
+    },
+    {
+      "epoch": 43.197278911564624,
+      "grad_norm": 0.00377645343542099,
+      "learning_rate": 4.055555555555556e-06,
+      "loss": 0.0001,
+      "step": 3175
+    },
+    {
+      "epoch": 43.53741496598639,
+      "grad_norm": 0.004163255449384451,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0001,
+      "step": 3200
+    },
+    {
+      "epoch": 43.87755102040816,
+      "grad_norm": 0.004156498704105616,
+      "learning_rate": 3.944444444444445e-06,
+      "loss": 0.0001,
+      "step": 3225
+    },
+    {
+      "epoch": 44.21768707482993,
+      "grad_norm": 0.002806807868182659,
+      "learning_rate": 3.88888888888889e-06,
+      "loss": 0.0001,
+      "step": 3250
+    },
+    {
+      "epoch": 44.5578231292517,
+      "grad_norm": 0.0035042332019656897,
+      "learning_rate": 3.833333333333334e-06,
+      "loss": 0.0001,
+      "step": 3275
+    },
+    {
+      "epoch": 44.89795918367347,
+      "grad_norm": 0.003332278924062848,
+      "learning_rate": 3.777777777777778e-06,
+      "loss": 0.0001,
+      "step": 3300
+    },
+    {
+      "epoch": 45.23809523809524,
+      "grad_norm": 0.003195433644577861,
+      "learning_rate": 3.7222222222222225e-06,
+      "loss": 0.0001,
+      "step": 3325
+    },
+    {
+      "epoch": 45.578231292517,
+      "grad_norm": 0.0031893581617623568,
+      "learning_rate": 3.6666666666666666e-06,
+      "loss": 0.0001,
+      "step": 3350
+    },
+    {
+      "epoch": 45.91836734693877,
+      "grad_norm": 0.0031172255985438824,
+      "learning_rate": 3.6111111111111115e-06,
+      "loss": 0.0001,
+      "step": 3375
+    },
+    {
+      "epoch": 46.25850340136054,
+      "grad_norm": 0.0032927945721894503,
+      "learning_rate": 3.555555555555556e-06,
+      "loss": 0.0001,
+      "step": 3400
+    },
+    {
+      "epoch": 46.59863945578231,
+      "grad_norm": 0.003365216078236699,
+      "learning_rate": 3.5e-06,
+      "loss": 0.0001,
+      "step": 3425
+    },
+    {
+      "epoch": 46.93877551020408,
+      "grad_norm": 0.003197253216058016,
+      "learning_rate": 3.444444444444445e-06,
+      "loss": 0.0001,
+      "step": 3450
+    },
+    {
+      "epoch": 47.27891156462585,
+      "grad_norm": 0.0033013387583196163,
+      "learning_rate": 3.3888888888888893e-06,
+      "loss": 0.0001,
+      "step": 3475
+    },
+    {
+      "epoch": 47.61904761904762,
+      "grad_norm": 0.0030345292761921883,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 0.0001,
+      "step": 3500
+    },
+    {
+      "epoch": 47.95918367346939,
+      "grad_norm": 0.002814779756590724,
+      "learning_rate": 3.277777777777778e-06,
+      "loss": 0.0001,
+      "step": 3525
+    },
+    {
+      "epoch": 48.29931972789116,
+      "grad_norm": 0.00269457814283669,
+      "learning_rate": 3.2222222222222227e-06,
+      "loss": 0.0001,
+      "step": 3550
+    },
+    {
+      "epoch": 48.63945578231292,
+      "grad_norm": 0.0031415882986038923,
+      "learning_rate": 3.1666666666666667e-06,
+      "loss": 0.0001,
+      "step": 3575
+    },
+    {
+      "epoch": 48.97959183673469,
+      "grad_norm": 0.0029843435622751713,
+      "learning_rate": 3.1111111111111116e-06,
+      "loss": 0.0001,
+      "step": 3600
+    },
+    {
+      "epoch": 49.31972789115646,
+      "grad_norm": 0.002468927064910531,
+      "learning_rate": 3.055555555555556e-06,
+      "loss": 0.0001,
+      "step": 3625
+    },
+    {
+      "epoch": 49.65986394557823,
+      "grad_norm": 0.002835317747667432,
+      "learning_rate": 3e-06,
+      "loss": 0.0001,
+      "step": 3650
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 0.003936604596674442,
+      "learning_rate": 2.944444444444445e-06,
+      "loss": 0.0001,
+      "step": 3675
+    },
+    {
+      "epoch": 50.34013605442177,
+      "grad_norm": 0.0028534727171063423,
+      "learning_rate": 2.888888888888889e-06,
+      "loss": 0.0001,
+      "step": 3700
+    },
+    {
+      "epoch": 50.68027210884354,
+      "grad_norm": 0.0029034256003797054,
+      "learning_rate": 2.8333333333333335e-06,
+      "loss": 0.0001,
+      "step": 3725
+    },
+    {
+      "epoch": 51.02040816326531,
+      "grad_norm": 0.0032608446199446917,
+      "learning_rate": 2.7777777777777783e-06,
+      "loss": 0.0001,
+      "step": 3750
+    },
+    {
+      "epoch": 51.36054421768708,
+      "grad_norm": 0.0026580116245895624,
+      "learning_rate": 2.7222222222222224e-06,
+      "loss": 0.0001,
+      "step": 3775
+    },
+    {
+      "epoch": 51.70068027210884,
+      "grad_norm": 0.0024348797742277384,
+      "learning_rate": 2.666666666666667e-06,
+      "loss": 0.0001,
+      "step": 3800
+    },
+    {
+      "epoch": 52.04081632653061,
+      "grad_norm": 0.002745755948126316,
+      "learning_rate": 2.6111111111111113e-06,
+      "loss": 0.0001,
+      "step": 3825
+    },
+    {
+      "epoch": 52.38095238095238,
+      "grad_norm": 0.0023754702415317297,
+      "learning_rate": 2.5555555555555557e-06,
+      "loss": 0.0001,
+      "step": 3850
+    },
+    {
+      "epoch": 52.72108843537415,
+      "grad_norm": 0.0026109826285392046,
+      "learning_rate": 2.5e-06,
+      "loss": 0.0001,
+      "step": 3875
+    },
+    {
+      "epoch": 53.06122448979592,
+      "grad_norm": 0.0025893962010741234,
+      "learning_rate": 2.4444444444444447e-06,
+      "loss": 0.0001,
+      "step": 3900
+    },
+    {
+      "epoch": 53.40136054421769,
+      "grad_norm": 0.0023246912751346827,
+      "learning_rate": 2.388888888888889e-06,
+      "loss": 0.0001,
+      "step": 3925
+    },
+    {
+      "epoch": 53.74149659863946,
+      "grad_norm": 0.002337114419788122,
+      "learning_rate": 2.3333333333333336e-06,
+      "loss": 0.0001,
+      "step": 3950
+    },
+    {
+      "epoch": 54.08163265306123,
+      "grad_norm": 0.002635551616549492,
+      "learning_rate": 2.277777777777778e-06,
+      "loss": 0.0001,
+      "step": 3975
+    },
+    {
+      "epoch": 54.421768707483,
+      "grad_norm": 0.002480172784999013,
+      "learning_rate": 2.222222222222222e-06,
+      "loss": 0.0001,
+      "step": 4000
+    },
+    {
+      "epoch": 54.421768707483,
+      "eval_loss": 0.4667165279388428,
+      "eval_runtime": 102.3194,
+      "eval_samples_per_second": 2.541,
+      "eval_steps_per_second": 0.166,
+      "eval_wer": 0.21596419246549795,
+      "step": 4000
+    },
+    {
+      "epoch": 54.76190476190476,
+      "grad_norm": 0.00236712908372283,
+      "learning_rate": 2.166666666666667e-06,
+      "loss": 0.0001,
+      "step": 4025
+    },
+    {
+      "epoch": 55.10204081632653,
+      "grad_norm": 0.002560981782153249,
+      "learning_rate": 2.1111111111111114e-06,
+      "loss": 0.0001,
+      "step": 4050
+    },
+    {
+      "epoch": 55.4421768707483,
+      "grad_norm": 0.002310627605766058,
+      "learning_rate": 2.0555555555555555e-06,
+      "loss": 0.0001,
+      "step": 4075
+    },
+    {
+      "epoch": 55.78231292517007,
+      "grad_norm": 0.0023427396081387997,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.0001,
+      "step": 4100
+    },
+    {
+      "epoch": 56.12244897959184,
+      "grad_norm": 0.002552021760493517,
+      "learning_rate": 1.944444444444445e-06,
+      "loss": 0.0001,
+      "step": 4125
+    },
+    {
+      "epoch": 56.46258503401361,
+      "grad_norm": 0.0021091082599014044,
+      "learning_rate": 1.888888888888889e-06,
+      "loss": 0.0001,
+      "step": 4150
+    },
+    {
+      "epoch": 56.802721088435376,
+      "grad_norm": 0.002378846053034067,
+      "learning_rate": 1.8333333333333333e-06,
+      "loss": 0.0001,
+      "step": 4175
+    },
+    {
+      "epoch": 57.142857142857146,
+      "grad_norm": 0.0022475309669971466,
+      "learning_rate": 1.777777777777778e-06,
+      "loss": 0.0001,
+      "step": 4200
+    },
+    {
+      "epoch": 57.48299319727891,
+      "grad_norm": 0.0024991375394165516,
+      "learning_rate": 1.7222222222222224e-06,
+      "loss": 0.0001,
+      "step": 4225
+    },
+    {
+      "epoch": 57.82312925170068,
+      "grad_norm": 0.0026695560663938522,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 0.0001,
+      "step": 4250
+    },
+    {
+      "epoch": 58.16326530612245,
+      "grad_norm": 0.0023107912857085466,
+      "learning_rate": 1.6111111111111113e-06,
+      "loss": 0.0001,
+      "step": 4275
+    },
+    {
+      "epoch": 58.50340136054422,
+      "grad_norm": 0.0022581815719604492,
+      "learning_rate": 1.5555555555555558e-06,
+      "loss": 0.0001,
+      "step": 4300
+    },
+    {
+      "epoch": 58.843537414965986,
+      "grad_norm": 0.0021738242357969284,
+      "learning_rate": 1.5e-06,
+      "loss": 0.0001,
+      "step": 4325
+    },
+    {
+      "epoch": 59.183673469387756,
+      "grad_norm": 0.0025273971259593964,
+      "learning_rate": 1.4444444444444445e-06,
+      "loss": 0.0001,
+      "step": 4350
+    },
+    {
+      "epoch": 59.523809523809526,
+      "grad_norm": 0.0021680588833987713,
+      "learning_rate": 1.3888888888888892e-06,
+      "loss": 0.0001,
+      "step": 4375
+    },
+    {
+      "epoch": 59.863945578231295,
+      "grad_norm": 0.0023293830454349518,
+      "learning_rate": 1.3333333333333334e-06,
+      "loss": 0.0001,
+      "step": 4400
+    },
+    {
+      "epoch": 60.204081632653065,
+      "grad_norm": 0.0019948012195527554,
+      "learning_rate": 1.2777777777777779e-06,
+      "loss": 0.0001,
+      "step": 4425
+    },
+    {
+      "epoch": 60.54421768707483,
+      "grad_norm": 0.00274069607257843,
+      "learning_rate": 1.2222222222222223e-06,
+      "loss": 0.0001,
+      "step": 4450
+    },
+    {
+      "epoch": 60.8843537414966,
+      "grad_norm": 0.002070850459858775,
+      "learning_rate": 1.1666666666666668e-06,
+      "loss": 0.0001,
+      "step": 4475
+    },
+    {
+      "epoch": 61.224489795918366,
+      "grad_norm": 0.002111976034939289,
+      "learning_rate": 1.111111111111111e-06,
+      "loss": 0.0001,
+      "step": 4500
+    },
+    {
+      "epoch": 61.564625850340136,
+      "grad_norm": 0.002158733317628503,
+      "learning_rate": 1.0555555555555557e-06,
+      "loss": 0.0001,
+      "step": 4525
+    },
+    {
+      "epoch": 61.904761904761905,
+      "grad_norm": 0.002263331552967429,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.0001,
+      "step": 4550
+    },
+    {
+      "epoch": 62.244897959183675,
+      "grad_norm": 0.0020453669130802155,
+      "learning_rate": 9.444444444444445e-07,
+      "loss": 0.0001,
+      "step": 4575
+    },
+    {
+      "epoch": 62.585034013605444,
+      "grad_norm": 0.0018243337981402874,
+      "learning_rate": 8.88888888888889e-07,
+      "loss": 0.0001,
+      "step": 4600
+    },
+    {
+      "epoch": 62.925170068027214,
+      "grad_norm": 0.0023292056284844875,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 0.0001,
+      "step": 4625
+    },
+    {
+      "epoch": 63.265306122448976,
+      "grad_norm": 0.0019000651082023978,
+      "learning_rate": 7.777777777777779e-07,
+      "loss": 0.0001,
+      "step": 4650
+    },
+    {
+      "epoch": 63.605442176870746,
+      "grad_norm": 0.002300037071108818,
+      "learning_rate": 7.222222222222222e-07,
+      "loss": 0.0001,
+      "step": 4675
+    },
+    {
+      "epoch": 63.945578231292515,
+      "grad_norm": 0.0020359556656330824,
+      "learning_rate": 6.666666666666667e-07,
+      "loss": 0.0001,
+      "step": 4700
+    },
+    {
+      "epoch": 64.28571428571429,
+      "grad_norm": 0.0019380120793357491,
+      "learning_rate": 6.111111111111112e-07,
+      "loss": 0.0001,
+      "step": 4725
+    },
+    {
+      "epoch": 64.62585034013605,
+      "grad_norm": 0.0018156260484829545,
+      "learning_rate": 5.555555555555555e-07,
+      "loss": 0.0001,
+      "step": 4750
+    },
+    {
+      "epoch": 64.96598639455782,
+      "grad_norm": 0.0021546927746385336,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 0.0001,
+      "step": 4775
+    },
+    {
+      "epoch": 65.3061224489796,
+      "grad_norm": 0.0019719544798135757,
+      "learning_rate": 4.444444444444445e-07,
+      "loss": 0.0001,
+      "step": 4800
+    },
+    {
+      "epoch": 65.64625850340136,
+      "grad_norm": 0.0018750294111669064,
+      "learning_rate": 3.8888888888888895e-07,
+      "loss": 0.0001,
+      "step": 4825
+    },
+    {
+      "epoch": 65.98639455782313,
+      "grad_norm": 0.002267962321639061,
+      "learning_rate": 3.3333333333333335e-07,
+      "loss": 0.0001,
+      "step": 4850
+    },
+    {
+      "epoch": 66.3265306122449,
+      "grad_norm": 0.0019681702833622694,
+      "learning_rate": 2.7777777777777776e-07,
+      "loss": 0.0001,
+      "step": 4875
+    },
+    {
+      "epoch": 66.66666666666667,
+      "grad_norm": 0.0022395187988877296,
+      "learning_rate": 2.2222222222222224e-07,
+      "loss": 0.0001,
+      "step": 4900
+    },
+    {
+      "epoch": 67.00680272108843,
+      "grad_norm": 0.0033925268799066544,
+      "learning_rate": 1.6666666666666668e-07,
+      "loss": 0.0001,
+      "step": 4925
+    },
+    {
+      "epoch": 67.34693877551021,
+      "grad_norm": 0.0019715563394129276,
+      "learning_rate": 1.1111111111111112e-07,
+      "loss": 0.0001,
+      "step": 4950
+    },
+    {
+      "epoch": 67.68707482993197,
+      "grad_norm": 0.00179803348146379,
+      "learning_rate": 5.555555555555556e-08,
+      "loss": 0.0001,
+      "step": 4975
+    },
+    {
+      "epoch": 68.02721088435374,
+      "grad_norm": 0.0019662685226649046,
+      "learning_rate": 0.0,
+      "loss": 0.0001,
+      "step": 5000
+    },
+    {
+      "epoch": 68.02721088435374,
+      "eval_loss": 0.47325974702835083,
+      "eval_runtime": 102.3255,
+      "eval_samples_per_second": 2.541,
+      "eval_steps_per_second": 0.166,
+      "eval_wer": 0.21372622155911974,
+      "step": 5000
+    },
+    {
+      "epoch": 68.02721088435374,
+      "step": 5000,
+      "total_flos": 5.4059634054660096e+20,
+      "train_loss": 0.028010966634354553,
+      "train_runtime": 44897.3386,
+      "train_samples_per_second": 3.564,
+      "train_steps_per_second": 0.111
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 69,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.4059634054660096e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}