End of training

Browse files

Files changed (7) hide show

README.md +17 -4
all_results.json +11 -10
eval_results.json +6 -6
train_results.json +6 -5
trainer_state.json +1707 -623
wandb/run-20250212_152709-lejyafmi/files/output.log +168 -0
wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb +2 -2

README.md CHANGED Viewed

@@ -3,20 +3,33 @@ library_name: transformers
 license: apache-2.0
 base_model: openai/whisper-small
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
-- name: openai/whisper-small
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# openai/whisper-small
-This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1836
 - Wer: 10.8862

 license: apache-2.0
 base_model: openai/whisper-small
 tags:
+- whisper-event
 - generated_from_trainer
+datasets:
+- asierhv/composite_corpus_eu_v2.1
 metrics:
 - wer
 model-index:
+- name: Whisper Small Basque
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: asierhv/composite_corpus_eu_v2.1
+      type: asierhv/composite_corpus_eu_v2.1
+    metrics:
+    - name: Wer
+      type: wer
+      value: 10.886229784051602
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Small Basque
+This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the asierhv/composite_corpus_eu_v2.1 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1836
 - Wer: 10.8862

all_results.json CHANGED Viewed

@@ -1,12 +1,13 @@
 {
-    "epoch": 6.06,
-    "eval_loss": 0.19964517652988434,
-    "eval_runtime": 1715.2587,
-    "eval_samples_per_second": 7.296,
-    "eval_steps_per_second": 0.456,
-    "eval_wer": 12.012786552211754,
-    "train_loss": 0.15837200704216958,
-    "train_runtime": 29005.3522,
-    "train_samples_per_second": 5.516,
-    "train_steps_per_second": 0.172
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 0.1835634410381317,
+    "eval_runtime": 151.3822,
+    "eval_samples_per_second": 13.899,
+    "eval_steps_per_second": 0.872,
+    "eval_wer": 10.886229784051602,
+    "total_flos": 7.387786248192e+19,
+    "train_loss": 0.17036041705310345,
+    "train_runtime": 11036.9074,
+    "train_samples_per_second": 23.195,
+    "train_steps_per_second": 0.725
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 6.06,
-    "eval_loss": 0.19964517652988434,
-    "eval_runtime": 1715.2587,
-    "eval_samples_per_second": 7.296,
-    "eval_steps_per_second": 0.456,
-    "eval_wer": 12.012786552211754
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 0.1835634410381317,
+    "eval_runtime": 151.3822,
+    "eval_samples_per_second": 13.899,
+    "eval_steps_per_second": 0.872,
+    "eval_wer": 10.886229784051602
 }

train_results.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
-    "epoch": 6.06,
-    "train_loss": 0.15837200704216958,
-    "train_runtime": 29005.3522,
-    "train_samples_per_second": 5.516,
-    "train_steps_per_second": 0.172
 }

 {
+    "epoch": 1.0,
+    "total_flos": 7.387786248192e+19,
+    "train_loss": 0.17036041705310345,
+    "train_runtime": 11036.9074,
+    "train_samples_per_second": 23.195,
+    "train_steps_per_second": 0.725
 }

trainer_state.json CHANGED Viewed

@@ -1,1270 +1,2354 @@
 {
-  "best_metric": 12.012786552211754,
-  "best_model_checkpoint": "./checkpoint-5000",
-  "epoch": 6.0604,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.01,
       "learning_rate": 4.4e-07,
-      "loss": 1.6183,
       "step": 25
     },
     {
-      "epoch": 0.01,
       "learning_rate": 9.400000000000001e-07,
-      "loss": 1.4856,
       "step": 50
     },
     {
-      "epoch": 0.01,
       "learning_rate": 1.44e-06,
-      "loss": 1.1898,
       "step": 75
     },
     {
-      "epoch": 0.02,
       "learning_rate": 1.94e-06,
-      "loss": 0.9354,
       "step": 100
     },
     {
-      "epoch": 0.03,
       "learning_rate": 2.4400000000000004e-06,
-      "loss": 0.8172,
       "step": 125
     },
     {
-      "epoch": 0.03,
       "learning_rate": 2.9400000000000002e-06,
-      "loss": 0.7386,
       "step": 150
     },
     {
-      "epoch": 0.04,
       "learning_rate": 3.44e-06,
-      "loss": 0.6768,
       "step": 175
     },
     {
-      "epoch": 0.04,
       "learning_rate": 3.94e-06,
-      "loss": 0.6069,
       "step": 200
     },
     {
-      "epoch": 0.04,
       "learning_rate": 4.440000000000001e-06,
-      "loss": 0.5739,
       "step": 225
     },
     {
-      "epoch": 0.05,
       "learning_rate": 4.94e-06,
-      "loss": 0.5269,
       "step": 250
     },
     {
-      "epoch": 0.06,
       "learning_rate": 5.4400000000000004e-06,
-      "loss": 0.5011,
       "step": 275
     },
     {
-      "epoch": 0.06,
       "learning_rate": 5.94e-06,
-      "loss": 0.4842,
       "step": 300
     },
     {
-      "epoch": 0.07,
       "learning_rate": 6.440000000000001e-06,
-      "loss": 0.4698,
       "step": 325
     },
     {
-      "epoch": 0.07,
       "learning_rate": 6.9400000000000005e-06,
-      "loss": 0.446,
       "step": 350
     },
     {
-      "epoch": 0.07,
       "learning_rate": 7.440000000000001e-06,
-      "loss": 0.4378,
       "step": 375
     },
     {
-      "epoch": 0.08,
       "learning_rate": 7.94e-06,
-      "loss": 0.3966,
       "step": 400
     },
     {
-      "epoch": 0.09,
       "learning_rate": 8.44e-06,
-      "loss": 0.3916,
       "step": 425
     },
     {
-      "epoch": 0.09,
       "learning_rate": 8.94e-06,
-      "loss": 0.3746,
       "step": 450
     },
     {
-      "epoch": 0.1,
       "learning_rate": 9.440000000000001e-06,
-      "loss": 0.3372,
       "step": 475
     },
     {
-      "epoch": 0.1,
       "learning_rate": 9.940000000000001e-06,
-      "loss": 0.329,
       "step": 500
     },
     {
-      "epoch": 0.1,
-      "learning_rate": 9.951111111111111e-06,
-      "loss": 0.3364,
       "step": 525
     },
     {
-      "epoch": 0.11,
-      "learning_rate": 9.895555555555557e-06,
-      "loss": 0.3074,
       "step": 550
     },
     {
-      "epoch": 0.12,
-      "learning_rate": 9.84e-06,
-      "loss": 0.3134,
       "step": 575
     },
     {
-      "epoch": 0.12,
-      "learning_rate": 9.784444444444445e-06,
-      "loss": 0.348,
       "step": 600
     },
     {
-      "epoch": 0.12,
-      "learning_rate": 9.72888888888889e-06,
-      "loss": 0.3037,
       "step": 625
     },
     {
-      "epoch": 0.13,
-      "learning_rate": 9.673333333333334e-06,
-      "loss": 0.2768,
       "step": 650
     },
     {
-      "epoch": 0.14,
-      "learning_rate": 9.617777777777778e-06,
-      "loss": 0.3341,
       "step": 675
     },
     {
-      "epoch": 0.14,
-      "learning_rate": 9.562222222222223e-06,
-      "loss": 0.3203,
       "step": 700
     },
     {
-      "epoch": 0.14,
-      "learning_rate": 9.506666666666667e-06,
-      "loss": 0.2681,
       "step": 725
     },
     {
-      "epoch": 0.15,
-      "learning_rate": 9.451111111111112e-06,
-      "loss": 0.2906,
       "step": 750
     },
     {
-      "epoch": 0.15,
-      "learning_rate": 9.395555555555556e-06,
-      "loss": 0.2918,
       "step": 775
     },
     {
-      "epoch": 1.0,
-      "learning_rate": 9.340000000000002e-06,
-      "loss": 0.2697,
       "step": 800
     },
     {
-      "epoch": 1.01,
-      "learning_rate": 9.284444444444444e-06,
-      "loss": 0.2868,
       "step": 825
     },
     {
-      "epoch": 1.01,
-      "learning_rate": 9.22888888888889e-06,
-      "loss": 0.2654,
       "step": 850
     },
     {
-      "epoch": 1.02,
-      "learning_rate": 9.173333333333334e-06,
-      "loss": 0.2549,
       "step": 875
     },
     {
-      "epoch": 1.02,
-      "learning_rate": 9.117777777777778e-06,
-      "loss": 0.2508,
       "step": 900
     },
     {
-      "epoch": 1.03,
-      "learning_rate": 9.062222222222224e-06,
-      "loss": 0.2285,
       "step": 925
     },
     {
-      "epoch": 1.03,
-      "learning_rate": 9.006666666666666e-06,
-      "loss": 0.2362,
       "step": 950
     },
     {
-      "epoch": 1.04,
-      "learning_rate": 8.951111111111112e-06,
-      "loss": 0.2239,
       "step": 975
     },
     {
-      "epoch": 1.04,
-      "learning_rate": 8.895555555555556e-06,
-      "loss": 0.2009,
       "step": 1000
     },
     {
-      "epoch": 1.04,
-      "eval_loss": 0.24461345374584198,
-      "eval_runtime": 1705.6758,
-      "eval_samples_per_second": 7.337,
-      "eval_steps_per_second": 0.459,
-      "eval_wer": 17.688063892927563,
       "step": 1000
     },
     {
-      "epoch": 1.05,
-      "learning_rate": 8.84e-06,
-      "loss": 0.196,
       "step": 1025
     },
     {
-      "epoch": 1.05,
-      "learning_rate": 8.784444444444446e-06,
-      "loss": 0.2057,
       "step": 1050
     },
     {
-      "epoch": 1.06,
-      "learning_rate": 8.72888888888889e-06,
-      "loss": 0.2115,
       "step": 1075
     },
     {
-      "epoch": 1.06,
-      "learning_rate": 8.673333333333334e-06,
-      "loss": 0.1891,
       "step": 1100
     },
     {
-      "epoch": 1.07,
-      "learning_rate": 8.617777777777778e-06,
-      "loss": 0.1985,
       "step": 1125
     },
     {
-      "epoch": 1.07,
-      "learning_rate": 8.562222222222224e-06,
-      "loss": 0.184,
       "step": 1150
     },
     {
-      "epoch": 1.08,
-      "learning_rate": 8.506666666666668e-06,
-      "loss": 0.1581,
       "step": 1175
     },
     {
-      "epoch": 1.08,
-      "learning_rate": 8.451111111111112e-06,
-      "loss": 0.1609,
       "step": 1200
     },
     {
-      "epoch": 1.09,
-      "learning_rate": 8.395555555555557e-06,
-      "loss": 0.1528,
       "step": 1225
     },
     {
-      "epoch": 1.09,
-      "learning_rate": 8.34e-06,
-      "loss": 0.1387,
       "step": 1250
     },
     {
-      "epoch": 1.1,
-      "learning_rate": 8.284444444444446e-06,
-      "loss": 0.1312,
       "step": 1275
     },
     {
-      "epoch": 1.1,
-      "learning_rate": 8.22888888888889e-06,
-      "loss": 0.1471,
       "step": 1300
     },
     {
-      "epoch": 1.11,
-      "learning_rate": 8.173333333333334e-06,
-      "loss": 0.1208,
       "step": 1325
     },
     {
-      "epoch": 1.11,
-      "learning_rate": 8.11777777777778e-06,
-      "loss": 0.1387,
       "step": 1350
     },
     {
-      "epoch": 1.12,
-      "learning_rate": 8.062222222222222e-06,
-      "loss": 0.1393,
       "step": 1375
     },
     {
-      "epoch": 1.12,
-      "learning_rate": 8.006666666666667e-06,
-      "loss": 0.1347,
       "step": 1400
     },
     {
-      "epoch": 1.13,
-      "learning_rate": 7.951111111111111e-06,
-      "loss": 0.1278,
       "step": 1425
     },
     {
-      "epoch": 1.13,
-      "learning_rate": 7.895555555555557e-06,
-      "loss": 0.1528,
       "step": 1450
     },
     {
-      "epoch": 1.14,
-      "learning_rate": 7.840000000000001e-06,
-      "loss": 0.1614,
       "step": 1475
     },
     {
-      "epoch": 1.14,
-      "learning_rate": 7.784444444444445e-06,
-      "loss": 0.1255,
       "step": 1500
     },
     {
-      "epoch": 1.15,
-      "learning_rate": 7.72888888888889e-06,
-      "loss": 0.138,
       "step": 1525
     },
     {
-      "epoch": 1.15,
-      "learning_rate": 7.673333333333333e-06,
-      "loss": 0.1341,
       "step": 1550
     },
     {
-      "epoch": 2.0,
-      "learning_rate": 7.617777777777778e-06,
-      "loss": 0.1223,
       "step": 1575
     },
     {
-      "epoch": 2.01,
-      "learning_rate": 7.562222222222223e-06,
-      "loss": 0.1272,
       "step": 1600
     },
     {
-      "epoch": 2.01,
-      "learning_rate": 7.506666666666668e-06,
-      "loss": 0.1324,
       "step": 1625
     },
     {
-      "epoch": 2.02,
-      "learning_rate": 7.451111111111111e-06,
-      "loss": 0.1277,
       "step": 1650
     },
     {
-      "epoch": 2.02,
-      "learning_rate": 7.395555555555556e-06,
-      "loss": 0.1184,
       "step": 1675
     },
     {
-      "epoch": 2.03,
-      "learning_rate": 7.340000000000001e-06,
-      "loss": 0.1117,
       "step": 1700
     },
     {
-      "epoch": 2.03,
-      "learning_rate": 7.284444444444445e-06,
-      "loss": 0.1092,
       "step": 1725
     },
     {
-      "epoch": 2.04,
-      "learning_rate": 7.22888888888889e-06,
-      "loss": 0.1027,
       "step": 1750
     },
     {
-      "epoch": 2.04,
-      "learning_rate": 7.173333333333335e-06,
-      "loss": 0.1018,
       "step": 1775
     },
     {
-      "epoch": 2.05,
-      "learning_rate": 7.117777777777778e-06,
-      "loss": 0.0909,
       "step": 1800
     },
     {
-      "epoch": 2.05,
-      "learning_rate": 7.062222222222223e-06,
-      "loss": 0.104,
       "step": 1825
     },
     {
-      "epoch": 2.06,
-      "learning_rate": 7.006666666666667e-06,
-      "loss": 0.1002,
       "step": 1850
     },
     {
-      "epoch": 2.06,
-      "learning_rate": 6.951111111111112e-06,
-      "loss": 0.0964,
       "step": 1875
     },
     {
-      "epoch": 2.07,
-      "learning_rate": 6.8955555555555565e-06,
-      "loss": 0.0874,
       "step": 1900
     },
     {
-      "epoch": 2.07,
-      "learning_rate": 6.8400000000000014e-06,
-      "loss": 0.0885,
       "step": 1925
     },
     {
-      "epoch": 2.08,
-      "learning_rate": 6.784444444444445e-06,
-      "loss": 0.084,
       "step": 1950
     },
     {
-      "epoch": 2.08,
-      "learning_rate": 6.7288888888888895e-06,
-      "loss": 0.0766,
       "step": 1975
     },
     {
-      "epoch": 2.09,
-      "learning_rate": 6.6733333333333335e-06,
-      "loss": 0.0759,
       "step": 2000
     },
     {
-      "epoch": 2.09,
-      "eval_loss": 0.21021738648414612,
-      "eval_runtime": 1689.1729,
-      "eval_samples_per_second": 7.409,
-      "eval_steps_per_second": 0.464,
-      "eval_wer": 14.258399888466212,
       "step": 2000
     },
     {
-      "epoch": 2.09,
-      "learning_rate": 6.617777777777778e-06,
-      "loss": 0.0705,
       "step": 2025
     },
     {
-      "epoch": 2.1,
-      "learning_rate": 6.562222222222223e-06,
-      "loss": 0.0634,
       "step": 2050
     },
     {
-      "epoch": 2.1,
-      "learning_rate": 6.5066666666666665e-06,
-      "loss": 0.0671,
       "step": 2075
     },
     {
-      "epoch": 2.11,
-      "learning_rate": 6.451111111111111e-06,
-      "loss": 0.0658,
       "step": 2100
     },
     {
-      "epoch": 2.11,
-      "learning_rate": 6.395555555555556e-06,
-      "loss": 0.0601,
       "step": 2125
     },
     {
-      "epoch": 2.12,
-      "learning_rate": 6.34e-06,
-      "loss": 0.0701,
       "step": 2150
     },
     {
-      "epoch": 2.12,
-      "learning_rate": 6.284444444444445e-06,
-      "loss": 0.0642,
       "step": 2175
     },
     {
-      "epoch": 2.13,
-      "learning_rate": 6.22888888888889e-06,
-      "loss": 0.0612,
       "step": 2200
     },
     {
-      "epoch": 2.13,
-      "learning_rate": 6.173333333333333e-06,
-      "loss": 0.0571,
       "step": 2225
     },
     {
-      "epoch": 2.14,
-      "learning_rate": 6.117777777777778e-06,
-      "loss": 0.1102,
       "step": 2250
     },
     {
-      "epoch": 2.14,
-      "learning_rate": 6.062222222222223e-06,
-      "loss": 0.0744,
       "step": 2275
     },
     {
-      "epoch": 2.15,
-      "learning_rate": 6.006666666666667e-06,
-      "loss": 0.0638,
       "step": 2300
     },
     {
-      "epoch": 2.15,
-      "learning_rate": 5.951111111111112e-06,
-      "loss": 0.0638,
       "step": 2325
     },
     {
-      "epoch": 3.0,
-      "learning_rate": 5.895555555555557e-06,
-      "loss": 0.0601,
       "step": 2350
     },
     {
-      "epoch": 3.01,
-      "learning_rate": 5.84e-06,
-      "loss": 0.056,
       "step": 2375
     },
     {
-      "epoch": 3.01,
-      "learning_rate": 5.784444444444445e-06,
-      "loss": 0.062,
       "step": 2400
     },
     {
-      "epoch": 3.02,
-      "learning_rate": 5.72888888888889e-06,
-      "loss": 0.0645,
       "step": 2425
     },
     {
-      "epoch": 3.02,
-      "learning_rate": 5.673333333333334e-06,
-      "loss": 0.0595,
       "step": 2450
     },
     {
-      "epoch": 3.03,
-      "learning_rate": 5.617777777777779e-06,
-      "loss": 0.0543,
       "step": 2475
     },
     {
-      "epoch": 3.03,
-      "learning_rate": 5.562222222222222e-06,
-      "loss": 0.0525,
       "step": 2500
     },
     {
-      "epoch": 3.04,
-      "learning_rate": 5.506666666666667e-06,
-      "loss": 0.0494,
       "step": 2525
     },
     {
-      "epoch": 3.04,
-      "learning_rate": 5.451111111111112e-06,
-      "loss": 0.0473,
       "step": 2550
     },
     {
-      "epoch": 3.05,
-      "learning_rate": 5.3955555555555565e-06,
-      "loss": 0.0483,
       "step": 2575
     },
     {
-      "epoch": 3.05,
-      "learning_rate": 5.3400000000000005e-06,
-      "loss": 0.0467,
       "step": 2600
     },
     {
-      "epoch": 3.06,
-      "learning_rate": 5.2844444444444454e-06,
-      "loss": 0.0503,
       "step": 2625
     },
     {
-      "epoch": 3.06,
-      "learning_rate": 5.228888888888889e-06,
-      "loss": 0.0428,
       "step": 2650
     },
     {
-      "epoch": 3.07,
-      "learning_rate": 5.1733333333333335e-06,
-      "loss": 0.0418,
       "step": 2675
     },
     {
-      "epoch": 3.07,
-      "learning_rate": 5.117777777777778e-06,
-      "loss": 0.0424,
       "step": 2700
     },
     {
-      "epoch": 3.08,
-      "learning_rate": 5.062222222222222e-06,
-      "loss": 0.0406,
       "step": 2725
     },
     {
-      "epoch": 3.08,
-      "learning_rate": 5.006666666666667e-06,
-      "loss": 0.0327,
       "step": 2750
     },
     {
-      "epoch": 3.09,
-      "learning_rate": 4.951111111111111e-06,
-      "loss": 0.0371,
       "step": 2775
     },
     {
-      "epoch": 3.09,
-      "learning_rate": 4.895555555555556e-06,
-      "loss": 0.0313,
       "step": 2800
     },
     {
-      "epoch": 3.1,
-      "learning_rate": 4.84e-06,
-      "loss": 0.0295,
       "step": 2825
     },
     {
-      "epoch": 3.1,
-      "learning_rate": 4.784444444444445e-06,
-      "loss": 0.0284,
       "step": 2850
     },
     {
-      "epoch": 3.11,
-      "learning_rate": 4.728888888888889e-06,
-      "loss": 0.0322,
       "step": 2875
     },
     {
-      "epoch": 3.11,
-      "learning_rate": 4.673333333333333e-06,
-      "loss": 0.0307,
       "step": 2900
     },
     {
-      "epoch": 3.12,
-      "learning_rate": 4.617777777777778e-06,
-      "loss": 0.0316,
       "step": 2925
     },
     {
-      "epoch": 3.12,
-      "learning_rate": 4.562222222222222e-06,
-      "loss": 0.0286,
       "step": 2950
     },
     {
-      "epoch": 3.13,
-      "learning_rate": 4.506666666666667e-06,
-      "loss": 0.0289,
       "step": 2975
     },
     {
-      "epoch": 3.13,
-      "learning_rate": 4.451111111111112e-06,
-      "loss": 0.0264,
       "step": 3000
     },
     {
-      "epoch": 3.13,
-      "eval_loss": 0.2200043797492981,
-      "eval_runtime": 1693.8446,
-      "eval_samples_per_second": 7.389,
-      "eval_steps_per_second": 0.462,
-      "eval_wer": 13.689776733254993,
       "step": 3000
     },
     {
-      "epoch": 3.14,
-      "learning_rate": 4.395555555555556e-06,
-      "loss": 0.0609,
       "step": 3025
     },
     {
-      "epoch": 3.14,
-      "learning_rate": 4.34e-06,
-      "loss": 0.0455,
       "step": 3050
     },
     {
-      "epoch": 3.15,
-      "learning_rate": 4.284444444444445e-06,
-      "loss": 0.0323,
       "step": 3075
     },
     {
-      "epoch": 3.15,
-      "learning_rate": 4.228888888888889e-06,
-      "loss": 0.0298,
       "step": 3100
     },
     {
-      "epoch": 3.16,
-      "learning_rate": 4.173333333333334e-06,
-      "loss": 0.0265,
       "step": 3125
     },
     {
-      "epoch": 4.0,
-      "learning_rate": 4.117777777777779e-06,
-      "loss": 0.1155,
       "step": 3150
     },
     {
-      "epoch": 4.01,
-      "learning_rate": 4.062222222222223e-06,
-      "loss": 0.1149,
       "step": 3175
     },
     {
-      "epoch": 4.01,
-      "learning_rate": 4.006666666666667e-06,
-      "loss": 0.1179,
       "step": 3200
     },
     {
-      "epoch": 4.02,
-      "learning_rate": 3.951111111111112e-06,
-      "loss": 0.1228,
       "step": 3225
     },
     {
-      "epoch": 4.02,
-      "learning_rate": 3.895555555555556e-06,
-      "loss": 0.1806,
       "step": 3250
     },
     {
-      "epoch": 4.03,
-      "learning_rate": 3.8400000000000005e-06,
-      "loss": 0.2425,
       "step": 3275
     },
     {
-      "epoch": 4.03,
-      "learning_rate": 3.784444444444445e-06,
-      "loss": 0.1122,
       "step": 3300
     },
     {
-      "epoch": 4.04,
-      "learning_rate": 3.728888888888889e-06,
-      "loss": 0.0986,
       "step": 3325
     },
     {
-      "epoch": 4.04,
-      "learning_rate": 3.673333333333334e-06,
-      "loss": 0.0942,
       "step": 3350
     },
     {
-      "epoch": 4.05,
-      "learning_rate": 3.617777777777778e-06,
-      "loss": 0.1099,
       "step": 3375
     },
     {
-      "epoch": 4.05,
-      "learning_rate": 3.5622222222222224e-06,
-      "loss": 0.1332,
       "step": 3400
     },
     {
-      "epoch": 4.06,
-      "learning_rate": 3.5066666666666673e-06,
-      "loss": 0.1368,
       "step": 3425
     },
     {
-      "epoch": 4.06,
-      "learning_rate": 3.4511111111111113e-06,
-      "loss": 0.164,
       "step": 3450
     },
     {
-      "epoch": 4.07,
-      "learning_rate": 3.3955555555555558e-06,
-      "loss": 0.174,
       "step": 3475
     },
     {
-      "epoch": 4.07,
-      "learning_rate": 3.3400000000000006e-06,
-      "loss": 0.1675,
       "step": 3500
     },
     {
-      "epoch": 4.08,
-      "learning_rate": 3.2844444444444447e-06,
-      "loss": 0.0996,
       "step": 3525
     },
     {
-      "epoch": 4.08,
-      "learning_rate": 3.228888888888889e-06,
-      "loss": 0.0795,
       "step": 3550
     },
     {
-      "epoch": 4.09,
-      "learning_rate": 3.173333333333334e-06,
-      "loss": 0.0683,
       "step": 3575
     },
     {
-      "epoch": 4.09,
-      "learning_rate": 3.117777777777778e-06,
-      "loss": 0.0635,
       "step": 3600
     },
     {
-      "epoch": 4.1,
-      "learning_rate": 3.0622222222222225e-06,
-      "loss": 0.0751,
       "step": 3625
     },
     {
-      "epoch": 4.1,
-      "learning_rate": 3.0066666666666674e-06,
-      "loss": 0.0712,
       "step": 3650
     },
     {
-      "epoch": 4.11,
-      "learning_rate": 2.9511111111111114e-06,
-      "loss": 0.0703,
       "step": 3675
     },
     {
-      "epoch": 4.11,
-      "learning_rate": 2.895555555555556e-06,
-      "loss": 0.1152,
       "step": 3700
     },
     {
-      "epoch": 4.12,
-      "learning_rate": 2.84e-06,
-      "loss": 0.1039,
       "step": 3725
     },
     {
-      "epoch": 4.12,
-      "learning_rate": 2.784444444444445e-06,
-      "loss": 0.0863,
       "step": 3750
     },
     {
-      "epoch": 4.13,
-      "learning_rate": 2.7288888888888893e-06,
-      "loss": 0.0882,
       "step": 3775
     },
     {
-      "epoch": 4.13,
-      "learning_rate": 2.6733333333333333e-06,
-      "loss": 0.0677,
       "step": 3800
     },
     {
-      "epoch": 4.14,
-      "learning_rate": 2.617777777777778e-06,
-      "loss": 0.0511,
       "step": 3825
     },
     {
-      "epoch": 4.14,
-      "learning_rate": 2.5622222222222226e-06,
-      "loss": 0.0283,
       "step": 3850
     },
     {
-      "epoch": 4.15,
-      "learning_rate": 2.5066666666666667e-06,
-      "loss": 0.0246,
       "step": 3875
     },
     {
-      "epoch": 4.15,
-      "learning_rate": 2.451111111111111e-06,
-      "loss": 0.0231,
       "step": 3900
     },
     {
-      "epoch": 5.0,
-      "learning_rate": 2.3955555555555556e-06,
-      "loss": 0.0442,
       "step": 3925
     },
     {
-      "epoch": 5.01,
-      "learning_rate": 2.3400000000000005e-06,
-      "loss": 0.063,
       "step": 3950
     },
     {
-      "epoch": 5.01,
-      "learning_rate": 2.2844444444444445e-06,
-      "loss": 0.0548,
       "step": 3975
     },
     {
-      "epoch": 5.02,
-      "learning_rate": 2.228888888888889e-06,
-      "loss": 0.0633,
       "step": 4000
     },
     {
-      "epoch": 5.02,
-      "eval_loss": 0.1954876184463501,
-      "eval_runtime": 1710.419,
-      "eval_samples_per_second": 7.317,
-      "eval_steps_per_second": 0.458,
-      "eval_wer": 12.553526260232228,
       "step": 4000
     },
     {
-      "epoch": 5.02,
-      "learning_rate": 2.1733333333333334e-06,
-      "loss": 0.0801,
       "step": 4025
     },
     {
-      "epoch": 5.03,
-      "learning_rate": 2.117777777777778e-06,
-      "loss": 0.1296,
       "step": 4050
     },
     {
-      "epoch": 5.03,
-      "learning_rate": 2.0622222222222223e-06,
-      "loss": 0.0944,
       "step": 4075
     },
     {
-      "epoch": 5.04,
-      "learning_rate": 2.006666666666667e-06,
-      "loss": 0.0541,
       "step": 4100
     },
     {
-      "epoch": 5.04,
-      "learning_rate": 1.9511111111111113e-06,
-      "loss": 0.0511,
       "step": 4125
     },
     {
-      "epoch": 5.05,
-      "learning_rate": 1.8955555555555557e-06,
-      "loss": 0.0524,
       "step": 4150
     },
     {
-      "epoch": 5.05,
-      "learning_rate": 1.8400000000000002e-06,
-      "loss": 0.0656,
       "step": 4175
     },
     {
-      "epoch": 5.06,
-      "learning_rate": 1.7844444444444444e-06,
-      "loss": 0.0675,
       "step": 4200
     },
     {
-      "epoch": 5.06,
-      "learning_rate": 1.728888888888889e-06,
-      "loss": 0.0916,
       "step": 4225
     },
     {
-      "epoch": 5.07,
-      "learning_rate": 1.6733333333333335e-06,
-      "loss": 0.0977,
       "step": 4250
     },
     {
-      "epoch": 5.07,
-      "learning_rate": 1.6177777777777778e-06,
-      "loss": 0.108,
       "step": 4275
     },
     {
-      "epoch": 5.08,
-      "learning_rate": 1.5622222222222225e-06,
-      "loss": 0.0672,
       "step": 4300
     },
     {
-      "epoch": 5.08,
-      "learning_rate": 1.506666666666667e-06,
-      "loss": 0.0444,
       "step": 4325
     },
     {
-      "epoch": 5.09,
-      "learning_rate": 1.4511111111111112e-06,
-      "loss": 0.0371,
       "step": 4350
     },
     {
-      "epoch": 5.09,
-      "learning_rate": 1.3955555555555556e-06,
-      "loss": 0.0368,
       "step": 4375
     },
     {
-      "epoch": 5.1,
-      "learning_rate": 1.34e-06,
-      "loss": 0.038,
       "step": 4400
     },
     {
-      "epoch": 5.1,
-      "learning_rate": 1.2844444444444445e-06,
-      "loss": 0.0366,
       "step": 4425
     },
     {
-      "epoch": 5.11,
-      "learning_rate": 1.228888888888889e-06,
-      "loss": 0.041,
       "step": 4450
     },
     {
-      "epoch": 5.11,
-      "learning_rate": 1.1733333333333335e-06,
-      "loss": 0.0613,
       "step": 4475
     },
     {
-      "epoch": 5.12,
-      "learning_rate": 1.117777777777778e-06,
-      "loss": 0.06,
       "step": 4500
     },
     {
-      "epoch": 5.12,
-      "learning_rate": 1.0622222222222222e-06,
-      "loss": 0.0519,
       "step": 4525
     },
     {
-      "epoch": 5.13,
-      "learning_rate": 1.0066666666666668e-06,
-      "loss": 0.0475,
       "step": 4550
     },
     {
-      "epoch": 5.13,
-      "learning_rate": 9.511111111111111e-07,
-      "loss": 0.0328,
       "step": 4575
     },
     {
-      "epoch": 5.14,
-      "learning_rate": 8.955555555555557e-07,
-      "loss": 0.046,
       "step": 4600
     },
     {
-      "epoch": 5.14,
-      "learning_rate": 8.400000000000001e-07,
-      "loss": 0.0184,
       "step": 4625
     },
     {
-      "epoch": 5.15,
-      "learning_rate": 7.844444444444445e-07,
-      "loss": 0.014,
       "step": 4650
     },
     {
-      "epoch": 5.15,
-      "learning_rate": 7.28888888888889e-07,
-      "loss": 0.0138,
       "step": 4675
     },
     {
-      "epoch": 6.0,
-      "learning_rate": 6.733333333333334e-07,
-      "loss": 0.0116,
       "step": 4700
     },
     {
-      "epoch": 6.01,
-      "learning_rate": 6.177777777777778e-07,
-      "loss": 0.014,
       "step": 4725
     },
     {
-      "epoch": 6.01,
-      "learning_rate": 5.622222222222223e-07,
-      "loss": 0.0156,
       "step": 4750
     },
     {
-      "epoch": 6.02,
-      "learning_rate": 5.066666666666667e-07,
-      "loss": 0.0136,
       "step": 4775
     },
     {
-      "epoch": 6.02,
-      "learning_rate": 4.511111111111111e-07,
-      "loss": 0.0144,
       "step": 4800
     },
     {
-      "epoch": 6.03,
-      "learning_rate": 3.9555555555555557e-07,
-      "loss": 0.0155,
       "step": 4825
     },
     {
-      "epoch": 6.03,
-      "learning_rate": 3.4000000000000003e-07,
-      "loss": 0.0187,
       "step": 4850
     },
     {
-      "epoch": 6.04,
-      "learning_rate": 2.844444444444445e-07,
-      "loss": 0.0189,
       "step": 4875
     },
     {
-      "epoch": 6.04,
-      "learning_rate": 2.2888888888888892e-07,
-      "loss": 0.0187,
       "step": 4900
     },
     {
-      "epoch": 6.05,
-      "learning_rate": 1.7333333333333335e-07,
-      "loss": 0.0194,
       "step": 4925
     },
     {
-      "epoch": 6.05,
-      "learning_rate": 1.1777777777777778e-07,
-      "loss": 0.019,
       "step": 4950
     },
     {
-      "epoch": 6.06,
-      "learning_rate": 6.222222222222223e-08,
-      "loss": 0.0183,
       "step": 4975
     },
     {
-      "epoch": 6.06,
-      "learning_rate": 6.666666666666667e-09,
-      "loss": 0.0199,
       "step": 5000
     },
     {
-      "epoch": 6.06,
-      "eval_loss": 0.19964517652988434,
-      "eval_runtime": 1705.0556,
-      "eval_samples_per_second": 7.34,
-      "eval_steps_per_second": 0.459,
-      "eval_wer": 12.012786552211754,
       "step": 5000
     },
     {
-      "epoch": 6.06,
-      "step": 5000,
-      "total_flos": 4.612864472875008e+19,
-      "train_loss": 0.15837200704216958,
-      "train_runtime": 29005.3522,
-      "train_samples_per_second": 5.516,
-      "train_steps_per_second": 0.172
     }
   ],
-  "max_steps": 5000,
   "num_train_epochs": 9223372036854775807,
-  "total_flos": 4.612864472875008e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 10.886229784051602,
+  "best_model_checkpoint": "./checkpoint-8000",
+  "epoch": 1.0,
+  "eval_steps": 1000,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.003125,
+      "grad_norm": 17.579944610595703,
       "learning_rate": 4.4e-07,
+      "loss": 2.3284,
       "step": 25
     },
     {
+      "epoch": 0.00625,
+      "grad_norm": 9.753120422363281,
       "learning_rate": 9.400000000000001e-07,
+      "loss": 1.9145,
       "step": 50
     },
     {
+      "epoch": 0.009375,
+      "grad_norm": 9.469987869262695,
       "learning_rate": 1.44e-06,
+      "loss": 1.2892,
       "step": 75
     },
     {
+      "epoch": 0.0125,
+      "grad_norm": 6.952774524688721,
       "learning_rate": 1.94e-06,
+      "loss": 0.9797,
       "step": 100
     },
     {
+      "epoch": 0.015625,
+      "grad_norm": 6.080902576446533,
       "learning_rate": 2.4400000000000004e-06,
+      "loss": 0.8265,
       "step": 125
     },
     {
+      "epoch": 0.01875,
+      "grad_norm": 5.6766037940979,
       "learning_rate": 2.9400000000000002e-06,
+      "loss": 0.6998,
       "step": 150
     },
     {
+      "epoch": 0.021875,
+      "grad_norm": 5.372249126434326,
       "learning_rate": 3.44e-06,
+      "loss": 0.6537,
       "step": 175
     },
     {
+      "epoch": 0.025,
+      "grad_norm": 5.710323810577393,
       "learning_rate": 3.94e-06,
+      "loss": 0.6149,
       "step": 200
     },
     {
+      "epoch": 0.028125,
+      "grad_norm": 5.235953330993652,
       "learning_rate": 4.440000000000001e-06,
+      "loss": 0.5256,
       "step": 225
     },
     {
+      "epoch": 0.03125,
+      "grad_norm": 6.58635950088501,
       "learning_rate": 4.94e-06,
+      "loss": 0.54,
       "step": 250
     },
     {
+      "epoch": 0.034375,
+      "grad_norm": 5.4912004470825195,
       "learning_rate": 5.4400000000000004e-06,
+      "loss": 0.5521,
       "step": 275
     },
     {
+      "epoch": 0.0375,
+      "grad_norm": 5.846869945526123,
       "learning_rate": 5.94e-06,
+      "loss": 0.5379,
       "step": 300
     },
     {
+      "epoch": 0.040625,
+      "grad_norm": 5.060309410095215,
       "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4778,
       "step": 325
     },
     {
+      "epoch": 0.04375,
+      "grad_norm": 5.06487512588501,
       "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4152,
       "step": 350
     },
     {
+      "epoch": 0.046875,
+      "grad_norm": 4.936045169830322,
       "learning_rate": 7.440000000000001e-06,
+      "loss": 0.3547,
       "step": 375
     },
     {
+      "epoch": 0.05,
+      "grad_norm": 3.8072471618652344,
       "learning_rate": 7.94e-06,
+      "loss": 0.3428,
       "step": 400
     },
     {
+      "epoch": 0.053125,
+      "grad_norm": 3.9378795623779297,
       "learning_rate": 8.44e-06,
+      "loss": 0.3099,
       "step": 425
     },
     {
+      "epoch": 0.05625,
+      "grad_norm": 3.732869863510132,
       "learning_rate": 8.94e-06,
+      "loss": 0.2963,
       "step": 450
     },
     {
+      "epoch": 0.059375,
+      "grad_norm": 3.9596025943756104,
       "learning_rate": 9.440000000000001e-06,
+      "loss": 0.2745,
       "step": 475
     },
     {
+      "epoch": 0.0625,
+      "grad_norm": 3.428398370742798,
       "learning_rate": 9.940000000000001e-06,
+      "loss": 0.2626,
       "step": 500
     },
     {
+      "epoch": 0.065625,
+      "grad_norm": 5.03747034072876,
+      "learning_rate": 9.970666666666668e-06,
+      "loss": 0.2411,
       "step": 525
     },
     {
+      "epoch": 0.06875,
+      "grad_norm": 3.2012217044830322,
+      "learning_rate": 9.937333333333334e-06,
+      "loss": 0.2389,
       "step": 550
     },
     {
+      "epoch": 0.071875,
+      "grad_norm": 3.7361278533935547,
+      "learning_rate": 9.904e-06,
+      "loss": 0.2217,
       "step": 575
     },
     {
+      "epoch": 0.075,
+      "grad_norm": 4.509885787963867,
+      "learning_rate": 9.870666666666667e-06,
+      "loss": 0.2246,
       "step": 600
     },
     {
+      "epoch": 0.078125,
+      "grad_norm": 3.462961435317993,
+      "learning_rate": 9.837333333333335e-06,
+      "loss": 0.199,
       "step": 625
     },
     {
+      "epoch": 0.08125,
+      "grad_norm": 2.764691114425659,
+      "learning_rate": 9.804000000000001e-06,
+      "loss": 0.2156,
       "step": 650
     },
     {
+      "epoch": 0.084375,
+      "grad_norm": 3.059408187866211,
+      "learning_rate": 9.770666666666668e-06,
+      "loss": 0.212,
       "step": 675
     },
     {
+      "epoch": 0.0875,
+      "grad_norm": 3.952425718307495,
+      "learning_rate": 9.737333333333334e-06,
+      "loss": 0.2123,
       "step": 700
     },
     {
+      "epoch": 0.090625,
+      "grad_norm": 4.892609119415283,
+      "learning_rate": 9.704e-06,
+      "loss": 0.2343,
       "step": 725
     },
     {
+      "epoch": 0.09375,
+      "grad_norm": 4.592615127563477,
+      "learning_rate": 9.670666666666667e-06,
+      "loss": 0.3308,
       "step": 750
     },
     {
+      "epoch": 0.096875,
+      "grad_norm": 4.663967132568359,
+      "learning_rate": 9.637333333333333e-06,
+      "loss": 0.3146,
       "step": 775
     },
     {
+      "epoch": 0.1,
+      "grad_norm": 5.091048717498779,
+      "learning_rate": 9.604000000000002e-06,
+      "loss": 0.3519,
       "step": 800
     },
     {
+      "epoch": 0.103125,
+      "grad_norm": 3.8216071128845215,
+      "learning_rate": 9.570666666666666e-06,
+      "loss": 0.2365,
       "step": 825
     },
     {
+      "epoch": 0.10625,
+      "grad_norm": 3.122516393661499,
+      "learning_rate": 9.537333333333334e-06,
+      "loss": 0.193,
       "step": 850
     },
     {
+      "epoch": 0.109375,
+      "grad_norm": 2.657339096069336,
+      "learning_rate": 9.504e-06,
+      "loss": 0.1759,
       "step": 875
     },
     {
+      "epoch": 0.1125,
+      "grad_norm": 4.554510116577148,
+      "learning_rate": 9.470666666666667e-06,
+      "loss": 0.2387,
       "step": 900
     },
     {
+      "epoch": 0.115625,
+      "grad_norm": 5.045220851898193,
+      "learning_rate": 9.437333333333334e-06,
+      "loss": 0.2845,
       "step": 925
     },
     {
+      "epoch": 0.11875,
+      "grad_norm": 4.260054588317871,
+      "learning_rate": 9.404e-06,
+      "loss": 0.2755,
       "step": 950
     },
     {
+      "epoch": 0.121875,
+      "grad_norm": 5.8209147453308105,
+      "learning_rate": 9.370666666666668e-06,
+      "loss": 0.481,
       "step": 975
     },
     {
+      "epoch": 0.125,
+      "grad_norm": 5.498444557189941,
+      "learning_rate": 9.337333333333335e-06,
+      "loss": 0.3998,
       "step": 1000
     },
     {
+      "epoch": 0.125,
+      "eval_loss": 0.36512792110443115,
+      "eval_runtime": 153.2646,
+      "eval_samples_per_second": 13.728,
+      "eval_steps_per_second": 0.861,
+      "eval_wer": 21.50135552023932,
       "step": 1000
     },
     {
+      "epoch": 0.128125,
+      "grad_norm": 4.732964515686035,
+      "learning_rate": 9.304000000000001e-06,
+      "loss": 0.329,
       "step": 1025
     },
     {
+      "epoch": 0.13125,
+      "grad_norm": 3.3556125164031982,
+      "learning_rate": 9.270666666666667e-06,
+      "loss": 0.2319,
       "step": 1050
     },
     {
+      "epoch": 0.134375,
+      "grad_norm": 2.9708847999572754,
+      "learning_rate": 9.237333333333334e-06,
+      "loss": 0.174,
       "step": 1075
     },
     {
+      "epoch": 0.1375,
+      "grad_norm": 2.841306447982788,
+      "learning_rate": 9.204e-06,
+      "loss": 0.1447,
       "step": 1100
     },
     {
+      "epoch": 0.140625,
+      "grad_norm": 2.7909176349639893,
+      "learning_rate": 9.170666666666668e-06,
+      "loss": 0.1406,
       "step": 1125
     },
     {
+      "epoch": 0.14375,
+      "grad_norm": 3.37842059135437,
+      "learning_rate": 9.137333333333333e-06,
+      "loss": 0.151,
       "step": 1150
     },
     {
+      "epoch": 0.146875,
+      "grad_norm": 3.023977041244507,
+      "learning_rate": 9.104000000000001e-06,
+      "loss": 0.1529,
       "step": 1175
     },
     {
+      "epoch": 0.15,
+      "grad_norm": 3.015974283218384,
+      "learning_rate": 9.070666666666668e-06,
+      "loss": 0.1496,
       "step": 1200
     },
     {
+      "epoch": 0.153125,
+      "grad_norm": 4.30889892578125,
+      "learning_rate": 9.037333333333334e-06,
+      "loss": 0.219,
       "step": 1225
     },
     {
+      "epoch": 0.15625,
+      "grad_norm": 4.160729885101318,
+      "learning_rate": 9.004e-06,
+      "loss": 0.238,
       "step": 1250
     },
     {
+      "epoch": 0.159375,
+      "grad_norm": 4.687659740447998,
+      "learning_rate": 8.970666666666667e-06,
+      "loss": 0.2603,
       "step": 1275
     },
     {
+      "epoch": 0.1625,
+      "grad_norm": 4.577232837677002,
+      "learning_rate": 8.937333333333335e-06,
+      "loss": 0.2666,
       "step": 1300
     },
     {
+      "epoch": 0.165625,
+      "grad_norm": 5.091732501983643,
+      "learning_rate": 8.904e-06,
+      "loss": 0.2337,
       "step": 1325
     },
     {
+      "epoch": 0.16875,
+      "grad_norm": 4.125801086425781,
+      "learning_rate": 8.870666666666668e-06,
+      "loss": 0.2379,
       "step": 1350
     },
     {
+      "epoch": 0.171875,
+      "grad_norm": 5.142183303833008,
+      "learning_rate": 8.837333333333334e-06,
+      "loss": 0.2215,
       "step": 1375
     },
     {
+      "epoch": 0.175,
+      "grad_norm": 4.486277103424072,
+      "learning_rate": 8.804e-06,
+      "loss": 0.2136,
       "step": 1400
     },
     {
+      "epoch": 0.178125,
+      "grad_norm": 3.5466482639312744,
+      "learning_rate": 8.770666666666667e-06,
+      "loss": 0.2214,
       "step": 1425
     },
     {
+      "epoch": 0.18125,
+      "grad_norm": 3.6199097633361816,
+      "learning_rate": 8.737333333333334e-06,
+      "loss": 0.2113,
       "step": 1450
     },
     {
+      "epoch": 0.184375,
+      "grad_norm": 2.559951066970825,
+      "learning_rate": 8.704e-06,
+      "loss": 0.1552,
       "step": 1475
     },
     {
+      "epoch": 0.1875,
+      "grad_norm": 2.9152133464813232,
+      "learning_rate": 8.670666666666666e-06,
+      "loss": 0.1354,
       "step": 1500
     },
     {
+      "epoch": 0.190625,
+      "grad_norm": 2.608732223510742,
+      "learning_rate": 8.637333333333335e-06,
+      "loss": 0.144,
       "step": 1525
     },
     {
+      "epoch": 0.19375,
+      "grad_norm": 4.0043416023254395,
+      "learning_rate": 8.604000000000001e-06,
+      "loss": 0.1367,
       "step": 1550
     },
     {
+      "epoch": 0.196875,
+      "grad_norm": 2.3621206283569336,
+      "learning_rate": 8.570666666666667e-06,
+      "loss": 0.1194,
       "step": 1575
     },
     {
+      "epoch": 0.2,
+      "grad_norm": 2.6970181465148926,
+      "learning_rate": 8.537333333333334e-06,
+      "loss": 0.1283,
       "step": 1600
     },
     {
+      "epoch": 0.203125,
+      "grad_norm": 4.737370014190674,
+      "learning_rate": 8.504000000000002e-06,
+      "loss": 0.1858,
       "step": 1625
     },
     {
+      "epoch": 0.20625,
+      "grad_norm": 3.462738513946533,
+      "learning_rate": 8.470666666666667e-06,
+      "loss": 0.1995,
       "step": 1650
     },
     {
+      "epoch": 0.209375,
+      "grad_norm": 4.608364582061768,
+      "learning_rate": 8.437333333333335e-06,
+      "loss": 0.2028,
       "step": 1675
     },
     {
+      "epoch": 0.2125,
+      "grad_norm": 2.770601987838745,
+      "learning_rate": 8.404000000000001e-06,
+      "loss": 0.1952,
       "step": 1700
     },
     {
+      "epoch": 0.215625,
+      "grad_norm": 3.041656017303467,
+      "learning_rate": 8.370666666666668e-06,
+      "loss": 0.1464,
       "step": 1725
     },
     {
+      "epoch": 0.21875,
+      "grad_norm": 2.988032102584839,
+      "learning_rate": 8.337333333333334e-06,
+      "loss": 0.1424,
       "step": 1750
     },
     {
+      "epoch": 0.221875,
+      "grad_norm": 3.0646026134490967,
+      "learning_rate": 8.304e-06,
+      "loss": 0.1233,
       "step": 1775
     },
     {
+      "epoch": 0.225,
+      "grad_norm": 2.617403268814087,
+      "learning_rate": 8.270666666666667e-06,
+      "loss": 0.1384,
       "step": 1800
     },
     {
+      "epoch": 0.228125,
+      "grad_norm": 2.6170425415039062,
+      "learning_rate": 8.237333333333333e-06,
+      "loss": 0.1208,
       "step": 1825
     },
     {
+      "epoch": 0.23125,
+      "grad_norm": 2.1296098232269287,
+      "learning_rate": 8.204000000000001e-06,
+      "loss": 0.1176,
       "step": 1850
     },
     {
+      "epoch": 0.234375,
+      "grad_norm": 2.767275810241699,
+      "learning_rate": 8.170666666666668e-06,
+      "loss": 0.1189,
       "step": 1875
     },
     {
+      "epoch": 0.2375,
+      "grad_norm": 2.7053661346435547,
+      "learning_rate": 8.137333333333334e-06,
+      "loss": 0.1211,
       "step": 1900
     },
     {
+      "epoch": 0.240625,
+      "grad_norm": 2.281399965286255,
+      "learning_rate": 8.104e-06,
+      "loss": 0.1156,
       "step": 1925
     },
     {
+      "epoch": 0.24375,
+      "grad_norm": 3.7013635635375977,
+      "learning_rate": 8.070666666666667e-06,
+      "loss": 0.1517,
       "step": 1950
     },
     {
+      "epoch": 0.246875,
+      "grad_norm": 3.7125532627105713,
+      "learning_rate": 8.037333333333334e-06,
+      "loss": 0.2002,
       "step": 1975
     },
     {
+      "epoch": 0.25,
+      "grad_norm": 3.8716859817504883,
+      "learning_rate": 8.004e-06,
+      "loss": 0.1975,
       "step": 2000
     },
     {
+      "epoch": 0.25,
+      "eval_loss": 0.2918355464935303,
+      "eval_runtime": 153.1763,
+      "eval_samples_per_second": 13.736,
+      "eval_steps_per_second": 0.862,
+      "eval_wer": 15.873609423202767,
       "step": 2000
     },
     {
+      "epoch": 0.253125,
+      "grad_norm": 2.4911813735961914,
+      "learning_rate": 7.970666666666668e-06,
+      "loss": 0.1648,
       "step": 2025
     },
     {
+      "epoch": 0.25625,
+      "grad_norm": 2.604146718978882,
+      "learning_rate": 7.937333333333333e-06,
+      "loss": 0.1162,
       "step": 2050
     },
     {
+      "epoch": 0.259375,
+      "grad_norm": 2.7352280616760254,
+      "learning_rate": 7.904000000000001e-06,
+      "loss": 0.1135,
       "step": 2075
     },
     {
+      "epoch": 0.2625,
+      "grad_norm": 2.2932169437408447,
+      "learning_rate": 7.870666666666667e-06,
+      "loss": 0.1153,
       "step": 2100
     },
     {
+      "epoch": 0.265625,
+      "grad_norm": 3.1734797954559326,
+      "learning_rate": 7.837333333333334e-06,
+      "loss": 0.1005,
       "step": 2125
     },
     {
+      "epoch": 0.26875,
+      "grad_norm": 2.4353103637695312,
+      "learning_rate": 7.804e-06,
+      "loss": 0.0988,
       "step": 2150
     },
     {
+      "epoch": 0.271875,
+      "grad_norm": 2.8655478954315186,
+      "learning_rate": 7.770666666666668e-06,
+      "loss": 0.1028,
       "step": 2175
     },
     {
+      "epoch": 0.275,
+      "grad_norm": 3.800967216491699,
+      "learning_rate": 7.737333333333335e-06,
+      "loss": 0.1751,
       "step": 2200
     },
     {
+      "epoch": 0.278125,
+      "grad_norm": 4.212419509887695,
+      "learning_rate": 7.704000000000001e-06,
+      "loss": 0.1798,
       "step": 2225
     },
     {
+      "epoch": 0.28125,
+      "grad_norm": 3.5863020420074463,
+      "learning_rate": 7.670666666666668e-06,
+      "loss": 0.199,
       "step": 2250
     },
     {
+      "epoch": 0.284375,
+      "grad_norm": 3.1013996601104736,
+      "learning_rate": 7.637333333333334e-06,
+      "loss": 0.1335,
       "step": 2275
     },
     {
+      "epoch": 0.2875,
+      "grad_norm": 2.2462713718414307,
+      "learning_rate": 7.604e-06,
+      "loss": 0.0976,
       "step": 2300
     },
     {
+      "epoch": 0.290625,
+      "grad_norm": 2.9669203758239746,
+      "learning_rate": 7.570666666666668e-06,
+      "loss": 0.0946,
       "step": 2325
     },
     {
+      "epoch": 0.29375,
+      "grad_norm": 2.645289897918701,
+      "learning_rate": 7.537333333333334e-06,
+      "loss": 0.0935,
       "step": 2350
     },
     {
+      "epoch": 0.296875,
+      "grad_norm": 1.9715274572372437,
+      "learning_rate": 7.5040000000000005e-06,
+      "loss": 0.1045,
       "step": 2375
     },
     {
+      "epoch": 0.3,
+      "grad_norm": 2.1423373222351074,
+      "learning_rate": 7.470666666666667e-06,
+      "loss": 0.0977,
       "step": 2400
     },
     {
+      "epoch": 0.303125,
+      "grad_norm": 2.029958963394165,
+      "learning_rate": 7.437333333333334e-06,
+      "loss": 0.1061,
       "step": 2425
     },
     {
+      "epoch": 0.30625,
+      "grad_norm": 1.972732663154602,
+      "learning_rate": 7.404e-06,
+      "loss": 0.0998,
       "step": 2450
     },
     {
+      "epoch": 0.309375,
+      "grad_norm": 2.2875239849090576,
+      "learning_rate": 7.370666666666667e-06,
+      "loss": 0.1068,
       "step": 2475
     },
     {
+      "epoch": 0.3125,
+      "grad_norm": 3.1778981685638428,
+      "learning_rate": 7.337333333333334e-06,
+      "loss": 0.1168,
       "step": 2500
     },
     {
+      "epoch": 0.315625,
+      "grad_norm": 3.360576868057251,
+      "learning_rate": 7.304000000000001e-06,
+      "loss": 0.1524,
       "step": 2525
     },
     {
+      "epoch": 0.31875,
+      "grad_norm": 3.5467047691345215,
+      "learning_rate": 7.270666666666667e-06,
+      "loss": 0.1483,
       "step": 2550
     },
     {
+      "epoch": 0.321875,
+      "grad_norm": 3.488696575164795,
+      "learning_rate": 7.237333333333334e-06,
+      "loss": 0.1775,
       "step": 2575
     },
     {
+      "epoch": 0.325,
+      "grad_norm": 2.8800296783447266,
+      "learning_rate": 7.204000000000001e-06,
+      "loss": 0.135,
       "step": 2600
     },
     {
+      "epoch": 0.328125,
+      "grad_norm": 3.1020660400390625,
+      "learning_rate": 7.170666666666667e-06,
+      "loss": 0.1108,
       "step": 2625
     },
     {
+      "epoch": 0.33125,
+      "grad_norm": 2.1233720779418945,
+      "learning_rate": 7.137333333333334e-06,
+      "loss": 0.1002,
       "step": 2650
     },
     {
+      "epoch": 0.334375,
+      "grad_norm": 2.393425703048706,
+      "learning_rate": 7.104000000000001e-06,
+      "loss": 0.0941,
       "step": 2675
     },
     {
+      "epoch": 0.3375,
+      "grad_norm": 2.295924186706543,
+      "learning_rate": 7.0706666666666665e-06,
+      "loss": 0.0959,
       "step": 2700
     },
     {
+      "epoch": 0.340625,
+      "grad_norm": 1.8125039339065552,
+      "learning_rate": 7.037333333333334e-06,
+      "loss": 0.1116,
       "step": 2725
     },
     {
+      "epoch": 0.34375,
+      "grad_norm": 3.006834030151367,
+      "learning_rate": 7.004000000000001e-06,
+      "loss": 0.1146,
       "step": 2750
     },
     {
+      "epoch": 0.346875,
+      "grad_norm": 4.171006679534912,
+      "learning_rate": 6.970666666666667e-06,
+      "loss": 0.2029,
       "step": 2775
     },
     {
+      "epoch": 0.35,
+      "grad_norm": 3.68646240234375,
+      "learning_rate": 6.937333333333334e-06,
+      "loss": 0.1913,
       "step": 2800
     },
     {
+      "epoch": 0.353125,
+      "grad_norm": 3.7463300228118896,
+      "learning_rate": 6.904e-06,
+      "loss": 0.16,
       "step": 2825
     },
     {
+      "epoch": 0.35625,
+      "grad_norm": 3.069136381149292,
+      "learning_rate": 6.8706666666666676e-06,
+      "loss": 0.1571,
       "step": 2850
     },
     {
+      "epoch": 0.359375,
+      "grad_norm": 3.17172908782959,
+      "learning_rate": 6.837333333333334e-06,
+      "loss": 0.1608,
       "step": 2875
     },
     {
+      "epoch": 0.3625,
+      "grad_norm": 3.1673102378845215,
+      "learning_rate": 6.804e-06,
+      "loss": 0.1546,
       "step": 2900
     },
     {
+      "epoch": 0.365625,
+      "grad_norm": 2.344193935394287,
+      "learning_rate": 6.770666666666668e-06,
+      "loss": 0.1282,
       "step": 2925
     },
     {
+      "epoch": 0.36875,
+      "grad_norm": 2.5321226119995117,
+      "learning_rate": 6.737333333333333e-06,
+      "loss": 0.0979,
       "step": 2950
     },
     {
+      "epoch": 0.371875,
+      "grad_norm": 2.2652363777160645,
+      "learning_rate": 6.7040000000000005e-06,
+      "loss": 0.1049,
       "step": 2975
     },
     {
+      "epoch": 0.375,
+      "grad_norm": 2.7856993675231934,
+      "learning_rate": 6.670666666666668e-06,
+      "loss": 0.1433,
       "step": 3000
     },
     {
+      "epoch": 0.375,
+      "eval_loss": 0.2720916271209717,
+      "eval_runtime": 151.7576,
+      "eval_samples_per_second": 13.864,
+      "eval_steps_per_second": 0.87,
+      "eval_wer": 13.9010937646069,
       "step": 3000
     },
     {
+      "epoch": 0.378125,
+      "grad_norm": 4.214677810668945,
+      "learning_rate": 6.637333333333333e-06,
+      "loss": 0.1758,
       "step": 3025
     },
     {
+      "epoch": 0.38125,
+      "grad_norm": 4.144543647766113,
+      "learning_rate": 6.604000000000001e-06,
+      "loss": 0.1972,
       "step": 3050
     },
     {
+      "epoch": 0.384375,
+      "grad_norm": 2.1775295734405518,
+      "learning_rate": 6.570666666666667e-06,
+      "loss": 0.1293,
       "step": 3075
     },
     {
+      "epoch": 0.3875,
+      "grad_norm": 2.796152353286743,
+      "learning_rate": 6.537333333333334e-06,
+      "loss": 0.099,
       "step": 3100
     },
     {
+      "epoch": 0.390625,
+      "grad_norm": 2.1920204162597656,
+      "learning_rate": 6.504e-06,
+      "loss": 0.0945,
       "step": 3125
     },
     {
+      "epoch": 0.39375,
+      "grad_norm": 2.8689582347869873,
+      "learning_rate": 6.470666666666667e-06,
+      "loss": 0.1118,
       "step": 3150
     },
     {
+      "epoch": 0.396875,
+      "grad_norm": 3.580993175506592,
+      "learning_rate": 6.4373333333333344e-06,
+      "loss": 0.1732,
       "step": 3175
     },
     {
+      "epoch": 0.4,
+      "grad_norm": 3.9165573120117188,
+      "learning_rate": 6.404e-06,
+      "loss": 0.1581,
       "step": 3200
     },
     {
+      "epoch": 0.403125,
+      "grad_norm": 3.8235292434692383,
+      "learning_rate": 6.370666666666667e-06,
+      "loss": 0.1716,
       "step": 3225
     },
     {
+      "epoch": 0.40625,
+      "grad_norm": 3.21138072013855,
+      "learning_rate": 6.3373333333333345e-06,
+      "loss": 0.1364,
       "step": 3250
     },
     {
+      "epoch": 0.409375,
+      "grad_norm": 3.925539255142212,
+      "learning_rate": 6.304e-06,
+      "loss": 0.1459,
       "step": 3275
     },
     {
+      "epoch": 0.4125,
+      "grad_norm": 3.062764883041382,
+      "learning_rate": 6.270666666666667e-06,
+      "loss": 0.1668,
       "step": 3300
     },
     {
+      "epoch": 0.415625,
+      "grad_norm": 2.8379392623901367,
+      "learning_rate": 6.237333333333334e-06,
+      "loss": 0.1243,
       "step": 3325
     },
     {
+      "epoch": 0.41875,
+      "grad_norm": 2.979661226272583,
+      "learning_rate": 6.204e-06,
+      "loss": 0.0979,
       "step": 3350
     },
     {
+      "epoch": 0.421875,
+      "grad_norm": 2.4838883876800537,
+      "learning_rate": 6.170666666666667e-06,
+      "loss": 0.0848,
       "step": 3375
     },
     {
+      "epoch": 0.425,
+      "grad_norm": 2.3293073177337646,
+      "learning_rate": 6.137333333333334e-06,
+      "loss": 0.0927,
       "step": 3400
     },
     {
+      "epoch": 0.428125,
+      "grad_norm": 3.3497400283813477,
+      "learning_rate": 6.104000000000001e-06,
+      "loss": 0.0976,
       "step": 3425
     },
     {
+      "epoch": 0.43125,
+      "grad_norm": 2.0302255153656006,
+      "learning_rate": 6.070666666666667e-06,
+      "loss": 0.0881,
       "step": 3450
     },
     {
+      "epoch": 0.434375,
+      "grad_norm": 2.112396001815796,
+      "learning_rate": 6.037333333333334e-06,
+      "loss": 0.0828,
       "step": 3475
     },
     {
+      "epoch": 0.4375,
+      "grad_norm": 2.513197183609009,
+      "learning_rate": 6.004000000000001e-06,
+      "loss": 0.0983,
       "step": 3500
     },
     {
+      "epoch": 0.440625,
+      "grad_norm": 2.1429622173309326,
+      "learning_rate": 5.970666666666667e-06,
+      "loss": 0.0929,
       "step": 3525
     },
     {
+      "epoch": 0.44375,
+      "grad_norm": 2.7300236225128174,
+      "learning_rate": 5.937333333333334e-06,
+      "loss": 0.0916,
       "step": 3550
     },
     {
+      "epoch": 0.446875,
+      "grad_norm": 4.011541366577148,
+      "learning_rate": 5.9040000000000006e-06,
+      "loss": 0.1426,
       "step": 3575
     },
     {
+      "epoch": 0.45,
+      "grad_norm": 3.1994545459747314,
+      "learning_rate": 5.870666666666667e-06,
+      "loss": 0.163,
       "step": 3600
     },
     {
+      "epoch": 0.453125,
+      "grad_norm": 2.98388934135437,
+      "learning_rate": 5.837333333333333e-06,
+      "loss": 0.1568,
       "step": 3625
     },
     {
+      "epoch": 0.45625,
+      "grad_norm": 2.4515798091888428,
+      "learning_rate": 5.804000000000001e-06,
+      "loss": 0.0937,
       "step": 3650
     },
     {
+      "epoch": 0.459375,
+      "grad_norm": 2.0767834186553955,
+      "learning_rate": 5.770666666666666e-06,
+      "loss": 0.0861,
       "step": 3675
     },
     {
+      "epoch": 0.4625,
+      "grad_norm": 2.601104974746704,
+      "learning_rate": 5.7373333333333335e-06,
+      "loss": 0.0917,
       "step": 3700
     },
     {
+      "epoch": 0.465625,
+      "grad_norm": 2.593489408493042,
+      "learning_rate": 5.704000000000001e-06,
+      "loss": 0.1022,
       "step": 3725
     },
     {
+      "epoch": 0.46875,
+      "grad_norm": 3.5832834243774414,
+      "learning_rate": 5.670666666666668e-06,
+      "loss": 0.1304,
       "step": 3750
     },
     {
+      "epoch": 0.471875,
+      "grad_norm": 3.4403560161590576,
+      "learning_rate": 5.637333333333334e-06,
+      "loss": 0.1634,
       "step": 3775
     },
     {
+      "epoch": 0.475,
+      "grad_norm": 3.6842737197875977,
+      "learning_rate": 5.604000000000001e-06,
+      "loss": 0.1683,
       "step": 3800
     },
     {
+      "epoch": 0.478125,
+      "grad_norm": 3.8382315635681152,
+      "learning_rate": 5.570666666666667e-06,
+      "loss": 0.1538,
       "step": 3825
     },
     {
+      "epoch": 0.48125,
+      "grad_norm": 4.207257270812988,
+      "learning_rate": 5.537333333333334e-06,
+      "loss": 0.165,
       "step": 3850
     },
     {
+      "epoch": 0.484375,
+      "grad_norm": 2.4130444526672363,
+      "learning_rate": 5.504e-06,
+      "loss": 0.1558,
       "step": 3875
     },
     {
+      "epoch": 0.4875,
+      "grad_norm": 2.3981151580810547,
+      "learning_rate": 5.4706666666666674e-06,
+      "loss": 0.1096,
       "step": 3900
     },
     {
+      "epoch": 0.490625,
+      "grad_norm": 2.2837915420532227,
+      "learning_rate": 5.437333333333333e-06,
+      "loss": 0.0937,
       "step": 3925
     },
     {
+      "epoch": 0.49375,
+      "grad_norm": 2.6647775173187256,
+      "learning_rate": 5.404e-06,
+      "loss": 0.0876,
       "step": 3950
     },
     {
+      "epoch": 0.496875,
+      "grad_norm": 3.7677643299102783,
+      "learning_rate": 5.3706666666666675e-06,
+      "loss": 0.15,
       "step": 3975
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 3.542175769805908,
+      "learning_rate": 5.337333333333333e-06,
+      "loss": 0.1925,
       "step": 4000
     },
     {
+      "epoch": 0.5,
+      "eval_loss": 0.25648659467697144,
+      "eval_runtime": 150.6646,
+      "eval_samples_per_second": 13.965,
+      "eval_steps_per_second": 0.876,
+      "eval_wer": 12.7372160418809,
       "step": 4000
     },
     {
+      "epoch": 0.503125,
+      "grad_norm": 2.5672571659088135,
+      "learning_rate": 5.304e-06,
+      "loss": 0.1434,
       "step": 4025
     },
     {
+      "epoch": 0.50625,
+      "grad_norm": 4.591808319091797,
+      "learning_rate": 5.270666666666668e-06,
+      "loss": 0.2075,
       "step": 4050
     },
     {
+      "epoch": 0.509375,
+      "grad_norm": 3.485185146331787,
+      "learning_rate": 5.237333333333334e-06,
+      "loss": 0.1478,
       "step": 4075
     },
     {
+      "epoch": 0.5125,
+      "grad_norm": 2.5995991230010986,
+      "learning_rate": 5.2040000000000005e-06,
+      "loss": 0.1383,
       "step": 4100
     },
     {
+      "epoch": 0.515625,
+      "grad_norm": 2.4682819843292236,
+      "learning_rate": 5.170666666666667e-06,
+      "loss": 0.0959,
       "step": 4125
     },
     {
+      "epoch": 0.51875,
+      "grad_norm": 2.436518669128418,
+      "learning_rate": 5.137333333333334e-06,
+      "loss": 0.0857,
       "step": 4150
     },
     {
+      "epoch": 0.521875,
+      "grad_norm": 2.0344107151031494,
+      "learning_rate": 5.104e-06,
+      "loss": 0.0862,
       "step": 4175
     },
     {
+      "epoch": 0.525,
+      "grad_norm": 1.6771937608718872,
+      "learning_rate": 5.070666666666667e-06,
+      "loss": 0.0808,
       "step": 4200
     },
     {
+      "epoch": 0.528125,
+      "grad_norm": 1.7831439971923828,
+      "learning_rate": 5.037333333333334e-06,
+      "loss": 0.0872,
       "step": 4225
     },
     {
+      "epoch": 0.53125,
+      "grad_norm": 2.228795051574707,
+      "learning_rate": 5.004e-06,
+      "loss": 0.0832,
       "step": 4250
     },
     {
+      "epoch": 0.534375,
+      "grad_norm": 3.1402647495269775,
+      "learning_rate": 4.970666666666667e-06,
+      "loss": 0.0927,
       "step": 4275
     },
     {
+      "epoch": 0.5375,
+      "grad_norm": 3.662506580352783,
+      "learning_rate": 4.937333333333334e-06,
+      "loss": 0.1477,
       "step": 4300
     },
     {
+      "epoch": 0.540625,
+      "grad_norm": 2.865934371948242,
+      "learning_rate": 4.904000000000001e-06,
+      "loss": 0.1262,
       "step": 4325
     },
     {
+      "epoch": 0.54375,
+      "grad_norm": 3.2233200073242188,
+      "learning_rate": 4.870666666666667e-06,
+      "loss": 0.1329,
       "step": 4350
     },
     {
+      "epoch": 0.546875,
+      "grad_norm": 2.093703269958496,
+      "learning_rate": 4.837333333333334e-06,
+      "loss": 0.0795,
       "step": 4375
     },
     {
+      "epoch": 0.55,
+      "grad_norm": 1.7601807117462158,
+      "learning_rate": 4.804e-06,
+      "loss": 0.0715,
       "step": 4400
     },
     {
+      "epoch": 0.553125,
+      "grad_norm": 2.1606643199920654,
+      "learning_rate": 4.770666666666667e-06,
+      "loss": 0.0797,
       "step": 4425
     },
     {
+      "epoch": 0.55625,
+      "grad_norm": 2.565343141555786,
+      "learning_rate": 4.737333333333334e-06,
+      "loss": 0.0883,
       "step": 4450
     },
     {
+      "epoch": 0.559375,
+      "grad_norm": 2.062619924545288,
+      "learning_rate": 4.704e-06,
+      "loss": 0.0965,
       "step": 4475
     },
     {
+      "epoch": 0.5625,
+      "grad_norm": 2.2219879627227783,
+      "learning_rate": 4.6706666666666675e-06,
+      "loss": 0.0891,
       "step": 4500
     },
     {
+      "epoch": 0.565625,
+      "grad_norm": 2.857029676437378,
+      "learning_rate": 4.637333333333334e-06,
+      "loss": 0.1147,
       "step": 4525
     },
     {
+      "epoch": 0.56875,
+      "grad_norm": 3.090247392654419,
+      "learning_rate": 4.604e-06,
+      "loss": 0.144,
       "step": 4550
     },
     {
+      "epoch": 0.571875,
+      "grad_norm": 3.8906264305114746,
+      "learning_rate": 4.570666666666667e-06,
+      "loss": 0.1451,
       "step": 4575
     },
     {
+      "epoch": 0.575,
+      "grad_norm": 3.7733590602874756,
+      "learning_rate": 4.537333333333334e-06,
+      "loss": 0.1475,
       "step": 4600
     },
     {
+      "epoch": 0.578125,
+      "grad_norm": 3.379163980484009,
+      "learning_rate": 4.504e-06,
+      "loss": 0.1509,
       "step": 4625
     },
     {
+      "epoch": 0.58125,
+      "grad_norm": 3.4210824966430664,
+      "learning_rate": 4.470666666666667e-06,
+      "loss": 0.1444,
       "step": 4650
     },
     {
+      "epoch": 0.584375,
+      "grad_norm": 3.7809910774230957,
+      "learning_rate": 4.437333333333333e-06,
+      "loss": 0.1295,
       "step": 4675
     },
     {
+      "epoch": 0.5875,
+      "grad_norm": 2.537574052810669,
+      "learning_rate": 4.4040000000000005e-06,
+      "loss": 0.1158,
       "step": 4700
     },
     {
+      "epoch": 0.590625,
+      "grad_norm": 3.482285261154175,
+      "learning_rate": 4.370666666666667e-06,
+      "loss": 0.1249,
       "step": 4725
     },
     {
+      "epoch": 0.59375,
+      "grad_norm": 3.0114011764526367,
+      "learning_rate": 4.337333333333334e-06,
+      "loss": 0.1238,
       "step": 4750
     },
     {
+      "epoch": 0.596875,
+      "grad_norm": 2.117215394973755,
+      "learning_rate": 4.304000000000001e-06,
+      "loss": 0.0888,
       "step": 4775
     },
     {
+      "epoch": 0.6,
+      "grad_norm": 2.0158379077911377,
+      "learning_rate": 4.270666666666667e-06,
+      "loss": 0.0972,
       "step": 4800
     },
     {
+      "epoch": 0.603125,
+      "grad_norm": 2.5208640098571777,
+      "learning_rate": 4.2373333333333335e-06,
+      "loss": 0.0793,
       "step": 4825
     },
     {
+      "epoch": 0.60625,
+      "grad_norm": 2.820002555847168,
+      "learning_rate": 4.204e-06,
+      "loss": 0.1035,
       "step": 4850
     },
     {
+      "epoch": 0.609375,
+      "grad_norm": 3.1144282817840576,
+      "learning_rate": 4.170666666666667e-06,
+      "loss": 0.1128,
       "step": 4875
     },
     {
+      "epoch": 0.6125,
+      "grad_norm": 3.1345527172088623,
+      "learning_rate": 4.137333333333334e-06,
+      "loss": 0.1217,
       "step": 4900
     },
     {
+      "epoch": 0.615625,
+      "grad_norm": 2.2702696323394775,
+      "learning_rate": 4.104e-06,
+      "loss": 0.1061,
       "step": 4925
     },
     {
+      "epoch": 0.61875,
+      "grad_norm": 2.714102268218994,
+      "learning_rate": 4.072e-06,
+      "loss": 0.0919,
       "step": 4950
     },
     {
+      "epoch": 0.621875,
+      "grad_norm": 2.448854923248291,
+      "learning_rate": 4.0386666666666666e-06,
+      "loss": 0.0855,
       "step": 4975
     },
     {
+      "epoch": 0.625,
+      "grad_norm": 2.9392127990722656,
+      "learning_rate": 4.005333333333334e-06,
+      "loss": 0.0818,
       "step": 5000
     },
     {
+      "epoch": 0.625,
+      "eval_loss": 0.2562941014766693,
+      "eval_runtime": 160.0125,
+      "eval_samples_per_second": 13.149,
+      "eval_steps_per_second": 0.825,
+      "eval_wer": 11.942600729176405,
       "step": 5000
     },
     {
+      "epoch": 0.628125,
+      "grad_norm": 2.4964210987091064,
+      "learning_rate": 3.972e-06,
+      "loss": 0.1203,
+      "step": 5025
+    },
+    {
+      "epoch": 0.63125,
+      "grad_norm": 3.330078125,
+      "learning_rate": 3.938666666666667e-06,
+      "loss": 0.111,
+      "step": 5050
+    },
+    {
+      "epoch": 0.634375,
+      "grad_norm": 3.6872191429138184,
+      "learning_rate": 3.905333333333334e-06,
+      "loss": 0.164,
+      "step": 5075
+    },
+    {
+      "epoch": 0.6375,
+      "grad_norm": 3.728769063949585,
+      "learning_rate": 3.872e-06,
+      "loss": 0.1515,
+      "step": 5100
+    },
+    {
+      "epoch": 0.640625,
+      "grad_norm": 3.4183156490325928,
+      "learning_rate": 3.838666666666667e-06,
+      "loss": 0.1334,
+      "step": 5125
+    },
+    {
+      "epoch": 0.64375,
+      "grad_norm": 3.4580440521240234,
+      "learning_rate": 3.8053333333333336e-06,
+      "loss": 0.134,
+      "step": 5150
+    },
+    {
+      "epoch": 0.646875,
+      "grad_norm": 2.2719855308532715,
+      "learning_rate": 3.772e-06,
+      "loss": 0.1088,
+      "step": 5175
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 2.3186910152435303,
+      "learning_rate": 3.7386666666666673e-06,
+      "loss": 0.0724,
+      "step": 5200
+    },
+    {
+      "epoch": 0.653125,
+      "grad_norm": 1.8175565004348755,
+      "learning_rate": 3.7053333333333337e-06,
+      "loss": 0.0759,
+      "step": 5225
+    },
+    {
+      "epoch": 0.65625,
+      "grad_norm": 2.0874826908111572,
+      "learning_rate": 3.6720000000000006e-06,
+      "loss": 0.0813,
+      "step": 5250
+    },
+    {
+      "epoch": 0.659375,
+      "grad_norm": 1.9950120449066162,
+      "learning_rate": 3.638666666666667e-06,
+      "loss": 0.0824,
+      "step": 5275
+    },
+    {
+      "epoch": 0.6625,
+      "grad_norm": 2.6349194049835205,
+      "learning_rate": 3.6053333333333334e-06,
+      "loss": 0.0835,
+      "step": 5300
+    },
+    {
+      "epoch": 0.665625,
+      "grad_norm": 2.7667415142059326,
+      "learning_rate": 3.5720000000000003e-06,
+      "loss": 0.0823,
+      "step": 5325
+    },
+    {
+      "epoch": 0.66875,
+      "grad_norm": 3.617748260498047,
+      "learning_rate": 3.538666666666667e-06,
+      "loss": 0.1077,
+      "step": 5350
+    },
+    {
+      "epoch": 0.671875,
+      "grad_norm": 3.2603073120117188,
+      "learning_rate": 3.5053333333333335e-06,
+      "loss": 0.1268,
+      "step": 5375
+    },
+    {
+      "epoch": 0.675,
+      "grad_norm": 2.9681355953216553,
+      "learning_rate": 3.4720000000000004e-06,
+      "loss": 0.1206,
+      "step": 5400
+    },
+    {
+      "epoch": 0.678125,
+      "grad_norm": 4.156548500061035,
+      "learning_rate": 3.438666666666667e-06,
+      "loss": 0.1279,
+      "step": 5425
+    },
+    {
+      "epoch": 0.68125,
+      "grad_norm": 3.2013888359069824,
+      "learning_rate": 3.4053333333333337e-06,
+      "loss": 0.1177,
+      "step": 5450
+    },
+    {
+      "epoch": 0.684375,
+      "grad_norm": 3.299403190612793,
+      "learning_rate": 3.372e-06,
+      "loss": 0.0946,
+      "step": 5475
+    },
+    {
+      "epoch": 0.6875,
+      "grad_norm": 2.39630389213562,
+      "learning_rate": 3.338666666666667e-06,
+      "loss": 0.0944,
+      "step": 5500
+    },
+    {
+      "epoch": 0.690625,
+      "grad_norm": 3.7624928951263428,
+      "learning_rate": 3.3053333333333338e-06,
+      "loss": 0.1149,
+      "step": 5525
+    },
+    {
+      "epoch": 0.69375,
+      "grad_norm": 3.3170886039733887,
+      "learning_rate": 3.272e-06,
+      "loss": 0.1373,
+      "step": 5550
+    },
+    {
+      "epoch": 0.696875,
+      "grad_norm": 2.2296531200408936,
+      "learning_rate": 3.238666666666667e-06,
+      "loss": 0.1056,
+      "step": 5575
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 1.8995999097824097,
+      "learning_rate": 3.2053333333333334e-06,
+      "loss": 0.0724,
+      "step": 5600
+    },
+    {
+      "epoch": 0.703125,
+      "grad_norm": 2.3782520294189453,
+      "learning_rate": 3.172e-06,
+      "loss": 0.0604,
+      "step": 5625
+    },
+    {
+      "epoch": 0.70625,
+      "grad_norm": 2.2558810710906982,
+      "learning_rate": 3.138666666666667e-06,
+      "loss": 0.0581,
+      "step": 5650
+    },
+    {
+      "epoch": 0.709375,
+      "grad_norm": 2.4040448665618896,
+      "learning_rate": 3.1053333333333336e-06,
+      "loss": 0.0713,
+      "step": 5675
+    },
+    {
+      "epoch": 0.7125,
+      "grad_norm": 2.5696732997894287,
+      "learning_rate": 3.072e-06,
+      "loss": 0.0773,
+      "step": 5700
+    },
+    {
+      "epoch": 0.715625,
+      "grad_norm": 2.237166404724121,
+      "learning_rate": 3.038666666666667e-06,
+      "loss": 0.0765,
+      "step": 5725
+    },
+    {
+      "epoch": 0.71875,
+      "grad_norm": 1.8783671855926514,
+      "learning_rate": 3.0053333333333332e-06,
+      "loss": 0.0779,
+      "step": 5750
+    },
+    {
+      "epoch": 0.721875,
+      "grad_norm": 2.096334457397461,
+      "learning_rate": 2.9720000000000005e-06,
+      "loss": 0.0751,
+      "step": 5775
+    },
+    {
+      "epoch": 0.725,
+      "grad_norm": 2.0362164974212646,
+      "learning_rate": 2.938666666666667e-06,
+      "loss": 0.0711,
+      "step": 5800
+    },
+    {
+      "epoch": 0.728125,
+      "grad_norm": 1.7136311531066895,
+      "learning_rate": 2.9053333333333334e-06,
+      "loss": 0.0635,
+      "step": 5825
+    },
+    {
+      "epoch": 0.73125,
+      "grad_norm": 2.754848003387451,
+      "learning_rate": 2.872e-06,
+      "loss": 0.0698,
+      "step": 5850
+    },
+    {
+      "epoch": 0.734375,
+      "grad_norm": 2.058065176010132,
+      "learning_rate": 2.8386666666666666e-06,
+      "loss": 0.0741,
+      "step": 5875
+    },
+    {
+      "epoch": 0.7375,
+      "grad_norm": 3.0389583110809326,
+      "learning_rate": 2.805333333333334e-06,
+      "loss": 0.0938,
+      "step": 5900
+    },
+    {
+      "epoch": 0.740625,
+      "grad_norm": 3.4811720848083496,
+      "learning_rate": 2.7720000000000003e-06,
+      "loss": 0.1387,
+      "step": 5925
+    },
+    {
+      "epoch": 0.74375,
+      "grad_norm": 3.2388477325439453,
+      "learning_rate": 2.7386666666666667e-06,
+      "loss": 0.1283,
+      "step": 5950
+    },
+    {
+      "epoch": 0.746875,
+      "grad_norm": 3.083925247192383,
+      "learning_rate": 2.7053333333333336e-06,
+      "loss": 0.1073,
+      "step": 5975
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 2.6847918033599854,
+      "learning_rate": 2.672e-06,
+      "loss": 0.1038,
+      "step": 6000
+    },
+    {
+      "epoch": 0.75,
+      "eval_loss": 0.23902159929275513,
+      "eval_runtime": 158.0693,
+      "eval_samples_per_second": 13.311,
+      "eval_steps_per_second": 0.835,
+      "eval_wer": 11.07319809292325,
+      "step": 6000
+    },
+    {
+      "epoch": 0.753125,
+      "grad_norm": 2.7315189838409424,
+      "learning_rate": 2.6386666666666673e-06,
+      "loss": 0.0987,
+      "step": 6025
+    },
+    {
+      "epoch": 0.75625,
+      "grad_norm": 2.3389735221862793,
+      "learning_rate": 2.6053333333333337e-06,
+      "loss": 0.0858,
+      "step": 6050
+    },
+    {
+      "epoch": 0.759375,
+      "grad_norm": 1.982534646987915,
+      "learning_rate": 2.572e-06,
+      "loss": 0.0764,
+      "step": 6075
+    },
+    {
+      "epoch": 0.7625,
+      "grad_norm": 1.9040074348449707,
+      "learning_rate": 2.538666666666667e-06,
+      "loss": 0.0731,
+      "step": 6100
+    },
+    {
+      "epoch": 0.765625,
+      "grad_norm": 2.654710054397583,
+      "learning_rate": 2.5053333333333334e-06,
+      "loss": 0.0758,
+      "step": 6125
+    },
+    {
+      "epoch": 0.76875,
+      "grad_norm": 2.6400296688079834,
+      "learning_rate": 2.4720000000000002e-06,
+      "loss": 0.0824,
+      "step": 6150
+    },
+    {
+      "epoch": 0.771875,
+      "grad_norm": 7.269197463989258,
+      "learning_rate": 2.438666666666667e-06,
+      "loss": 0.0822,
+      "step": 6175
+    },
+    {
+      "epoch": 0.775,
+      "grad_norm": 2.363656520843506,
+      "learning_rate": 2.4053333333333335e-06,
+      "loss": 0.0818,
+      "step": 6200
+    },
+    {
+      "epoch": 0.778125,
+      "grad_norm": 2.4660115242004395,
+      "learning_rate": 2.3720000000000003e-06,
+      "loss": 0.0768,
+      "step": 6225
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": 3.3116371631622314,
+      "learning_rate": 2.3386666666666668e-06,
+      "loss": 0.0783,
+      "step": 6250
+    },
+    {
+      "epoch": 0.784375,
+      "grad_norm": 2.595853090286255,
+      "learning_rate": 2.3053333333333336e-06,
+      "loss": 0.0899,
+      "step": 6275
+    },
+    {
+      "epoch": 0.7875,
+      "grad_norm": 2.709597587585449,
+      "learning_rate": 2.2720000000000004e-06,
+      "loss": 0.0953,
+      "step": 6300
+    },
+    {
+      "epoch": 0.790625,
+      "grad_norm": 2.4446637630462646,
+      "learning_rate": 2.238666666666667e-06,
+      "loss": 0.1249,
+      "step": 6325
+    },
+    {
+      "epoch": 0.79375,
+      "grad_norm": 3.4412341117858887,
+      "learning_rate": 2.2053333333333333e-06,
+      "loss": 0.1171,
+      "step": 6350
+    },
+    {
+      "epoch": 0.796875,
+      "grad_norm": 2.2719008922576904,
+      "learning_rate": 2.172e-06,
+      "loss": 0.1065,
+      "step": 6375
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.9873290061950684,
+      "learning_rate": 2.138666666666667e-06,
+      "loss": 0.0872,
+      "step": 6400
+    },
+    {
+      "epoch": 0.803125,
+      "grad_norm": 2.487403392791748,
+      "learning_rate": 2.1053333333333334e-06,
+      "loss": 0.0765,
+      "step": 6425
+    },
+    {
+      "epoch": 0.80625,
+      "grad_norm": 2.4424736499786377,
+      "learning_rate": 2.0720000000000002e-06,
+      "loss": 0.0736,
+      "step": 6450
+    },
+    {
+      "epoch": 0.809375,
+      "grad_norm": 3.1507577896118164,
+      "learning_rate": 2.0386666666666667e-06,
+      "loss": 0.1064,
+      "step": 6475
+    },
+    {
+      "epoch": 0.8125,
+      "grad_norm": 2.6285648345947266,
+      "learning_rate": 2.0053333333333335e-06,
+      "loss": 0.0993,
+      "step": 6500
+    },
+    {
+      "epoch": 0.815625,
+      "grad_norm": 4.1934967041015625,
+      "learning_rate": 1.972e-06,
+      "loss": 0.1299,
+      "step": 6525
+    },
+    {
+      "epoch": 0.81875,
+      "grad_norm": 3.031852960586548,
+      "learning_rate": 1.9386666666666668e-06,
+      "loss": 0.1195,
+      "step": 6550
+    },
+    {
+      "epoch": 0.821875,
+      "grad_norm": 2.9288837909698486,
+      "learning_rate": 1.9053333333333334e-06,
+      "loss": 0.1197,
+      "step": 6575
+    },
+    {
+      "epoch": 0.825,
+      "grad_norm": 2.890054225921631,
+      "learning_rate": 1.8720000000000002e-06,
+      "loss": 0.1127,
+      "step": 6600
+    },
+    {
+      "epoch": 0.828125,
+      "grad_norm": 3.130406618118286,
+      "learning_rate": 1.8386666666666669e-06,
+      "loss": 0.1155,
+      "step": 6625
+    },
+    {
+      "epoch": 0.83125,
+      "grad_norm": 2.7169485092163086,
+      "learning_rate": 1.8053333333333333e-06,
+      "loss": 0.1291,
+      "step": 6650
+    },
+    {
+      "epoch": 0.834375,
+      "grad_norm": 2.7390034198760986,
+      "learning_rate": 1.7720000000000001e-06,
+      "loss": 0.1097,
+      "step": 6675
+    },
+    {
+      "epoch": 0.8375,
+      "grad_norm": 2.161604166030884,
+      "learning_rate": 1.7386666666666668e-06,
+      "loss": 0.1022,
+      "step": 6700
+    },
+    {
+      "epoch": 0.840625,
+      "grad_norm": 2.210451126098633,
+      "learning_rate": 1.7053333333333336e-06,
+      "loss": 0.0779,
+      "step": 6725
+    },
+    {
+      "epoch": 0.84375,
+      "grad_norm": 2.426438808441162,
+      "learning_rate": 1.672e-06,
+      "loss": 0.0728,
+      "step": 6750
+    },
+    {
+      "epoch": 0.846875,
+      "grad_norm": 2.8744237422943115,
+      "learning_rate": 1.6386666666666667e-06,
+      "loss": 0.0859,
+      "step": 6775
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 2.8165483474731445,
+      "learning_rate": 1.6053333333333335e-06,
+      "loss": 0.1496,
+      "step": 6800
+    },
+    {
+      "epoch": 0.853125,
+      "grad_norm": 4.0077738761901855,
+      "learning_rate": 1.5720000000000002e-06,
+      "loss": 0.1343,
+      "step": 6825
+    },
+    {
+      "epoch": 0.85625,
+      "grad_norm": 3.8011586666107178,
+      "learning_rate": 1.538666666666667e-06,
+      "loss": 0.1397,
+      "step": 6850
+    },
+    {
+      "epoch": 0.859375,
+      "grad_norm": 2.7379047870635986,
+      "learning_rate": 1.5053333333333334e-06,
+      "loss": 0.1262,
+      "step": 6875
+    },
+    {
+      "epoch": 0.8625,
+      "grad_norm": 3.250950574874878,
+      "learning_rate": 1.472e-06,
+      "loss": 0.1188,
+      "step": 6900
+    },
+    {
+      "epoch": 0.865625,
+      "grad_norm": 2.782945156097412,
+      "learning_rate": 1.438666666666667e-06,
+      "loss": 0.1103,
+      "step": 6925
+    },
+    {
+      "epoch": 0.86875,
+      "grad_norm": 3.08154034614563,
+      "learning_rate": 1.4053333333333335e-06,
+      "loss": 0.1147,
+      "step": 6950
+    },
+    {
+      "epoch": 0.871875,
+      "grad_norm": 3.5768070220947266,
+      "learning_rate": 1.372e-06,
+      "loss": 0.1332,
+      "step": 6975
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 3.155341863632202,
+      "learning_rate": 1.3386666666666668e-06,
+      "loss": 0.1282,
+      "step": 7000
+    },
+    {
+      "epoch": 0.875,
+      "eval_loss": 0.23438745737075806,
+      "eval_runtime": 154.8314,
+      "eval_samples_per_second": 13.589,
+      "eval_steps_per_second": 0.853,
+      "eval_wer": 11.391044218005048,
+      "step": 7000
+    },
+    {
+      "epoch": 0.878125,
+      "grad_norm": 11.062019348144531,
+      "learning_rate": 1.308e-06,
+      "loss": 0.2406,
+      "step": 7025
+    },
+    {
+      "epoch": 0.88125,
+      "grad_norm": 4.648179531097412,
+      "learning_rate": 1.2746666666666669e-06,
+      "loss": 0.3469,
+      "step": 7050
+    },
+    {
+      "epoch": 0.884375,
+      "grad_norm": 4.388245105743408,
+      "learning_rate": 1.2413333333333335e-06,
+      "loss": 0.3421,
+      "step": 7075
+    },
+    {
+      "epoch": 0.8875,
+      "grad_norm": 4.806427478790283,
+      "learning_rate": 1.2080000000000001e-06,
+      "loss": 0.2847,
+      "step": 7100
+    },
+    {
+      "epoch": 0.890625,
+      "grad_norm": 3.0818049907684326,
+      "learning_rate": 1.1746666666666668e-06,
+      "loss": 0.1671,
+      "step": 7125
+    },
+    {
+      "epoch": 0.89375,
+      "grad_norm": 4.117819309234619,
+      "learning_rate": 1.1413333333333334e-06,
+      "loss": 0.1313,
+      "step": 7150
+    },
+    {
+      "epoch": 0.896875,
+      "grad_norm": 2.8558835983276367,
+      "learning_rate": 1.108e-06,
+      "loss": 0.1177,
+      "step": 7175
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 3.0425021648406982,
+      "learning_rate": 1.0746666666666669e-06,
+      "loss": 0.0911,
+      "step": 7200
+    },
+    {
+      "epoch": 0.903125,
+      "grad_norm": 2.6587588787078857,
+      "learning_rate": 1.0413333333333333e-06,
+      "loss": 0.0898,
+      "step": 7225
+    },
+    {
+      "epoch": 0.90625,
+      "grad_norm": 1.7572664022445679,
+      "learning_rate": 1.0080000000000001e-06,
+      "loss": 0.0922,
+      "step": 7250
+    },
+    {
+      "epoch": 0.909375,
+      "grad_norm": 2.00393009185791,
+      "learning_rate": 9.746666666666668e-07,
+      "loss": 0.0753,
+      "step": 7275
+    },
+    {
+      "epoch": 0.9125,
+      "grad_norm": 1.845981478691101,
+      "learning_rate": 9.413333333333334e-07,
+      "loss": 0.0628,
+      "step": 7300
+    },
+    {
+      "epoch": 0.915625,
+      "grad_norm": 2.008112907409668,
+      "learning_rate": 9.080000000000001e-07,
+      "loss": 0.0696,
+      "step": 7325
+    },
+    {
+      "epoch": 0.91875,
+      "grad_norm": 2.837357759475708,
+      "learning_rate": 8.746666666666668e-07,
+      "loss": 0.0897,
+      "step": 7350
+    },
+    {
+      "epoch": 0.921875,
+      "grad_norm": 2.4842417240142822,
+      "learning_rate": 8.413333333333334e-07,
+      "loss": 0.1227,
+      "step": 7375
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 2.7866716384887695,
+      "learning_rate": 8.08e-07,
+      "loss": 0.1012,
+      "step": 7400
+    },
+    {
+      "epoch": 0.928125,
+      "grad_norm": 2.1826930046081543,
+      "learning_rate": 7.746666666666668e-07,
+      "loss": 0.1141,
+      "step": 7425
+    },
+    {
+      "epoch": 0.93125,
+      "grad_norm": 2.014090061187744,
+      "learning_rate": 7.413333333333333e-07,
+      "loss": 0.0754,
+      "step": 7450
+    },
+    {
+      "epoch": 0.934375,
+      "grad_norm": 2.1539175510406494,
+      "learning_rate": 7.08e-07,
+      "loss": 0.0736,
+      "step": 7475
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 2.712541341781616,
+      "learning_rate": 6.746666666666667e-07,
+      "loss": 0.0684,
+      "step": 7500
+    },
+    {
+      "epoch": 0.940625,
+      "grad_norm": 3.281242847442627,
+      "learning_rate": 6.413333333333334e-07,
+      "loss": 0.1414,
+      "step": 7525
+    },
+    {
+      "epoch": 0.94375,
+      "grad_norm": 4.088025093078613,
+      "learning_rate": 6.08e-07,
+      "loss": 0.1895,
+      "step": 7550
+    },
+    {
+      "epoch": 0.946875,
+      "grad_norm": 4.144560813903809,
+      "learning_rate": 5.746666666666667e-07,
+      "loss": 0.222,
+      "step": 7575
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 1.8468823432922363,
+      "learning_rate": 5.413333333333334e-07,
+      "loss": 0.1349,
+      "step": 7600
+    },
+    {
+      "epoch": 0.953125,
+      "grad_norm": 2.5354621410369873,
+      "learning_rate": 5.08e-07,
+      "loss": 0.0872,
+      "step": 7625
+    },
+    {
+      "epoch": 0.95625,
+      "grad_norm": 1.83882737159729,
+      "learning_rate": 4.746666666666667e-07,
+      "loss": 0.0725,
+      "step": 7650
+    },
+    {
+      "epoch": 0.959375,
+      "grad_norm": 3.42556095123291,
+      "learning_rate": 4.413333333333333e-07,
+      "loss": 0.0988,
+      "step": 7675
+    },
+    {
+      "epoch": 0.9625,
+      "grad_norm": 2.682558059692383,
+      "learning_rate": 4.0800000000000005e-07,
+      "loss": 0.1166,
+      "step": 7700
+    },
+    {
+      "epoch": 0.965625,
+      "grad_norm": 3.2471797466278076,
+      "learning_rate": 3.7466666666666674e-07,
+      "loss": 0.1257,
+      "step": 7725
+    },
+    {
+      "epoch": 0.96875,
+      "grad_norm": 2.4202020168304443,
+      "learning_rate": 3.4133333333333337e-07,
+      "loss": 0.1114,
+      "step": 7750
+    },
+    {
+      "epoch": 0.971875,
+      "grad_norm": 2.8282711505889893,
+      "learning_rate": 3.0800000000000006e-07,
+      "loss": 0.0811,
+      "step": 7775
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 4.20676326751709,
+      "learning_rate": 2.746666666666667e-07,
+      "loss": 0.104,
+      "step": 7800
+    },
+    {
+      "epoch": 0.978125,
+      "grad_norm": 4.955998420715332,
+      "learning_rate": 2.413333333333333e-07,
+      "loss": 0.2773,
+      "step": 7825
+    },
+    {
+      "epoch": 0.98125,
+      "grad_norm": 2.0168468952178955,
+      "learning_rate": 2.08e-07,
+      "loss": 0.1105,
+      "step": 7850
+    },
+    {
+      "epoch": 0.984375,
+      "grad_norm": 1.6335862874984741,
+      "learning_rate": 1.7466666666666667e-07,
+      "loss": 0.0808,
+      "step": 7875
+    },
+    {
+      "epoch": 0.9875,
+      "grad_norm": 2.269954204559326,
+      "learning_rate": 1.4133333333333333e-07,
+      "loss": 0.0786,
+      "step": 7900
+    },
+    {
+      "epoch": 0.990625,
+      "grad_norm": 2.0813560485839844,
+      "learning_rate": 1.0800000000000001e-07,
+      "loss": 0.0801,
+      "step": 7925
+    },
+    {
+      "epoch": 0.99375,
+      "grad_norm": 1.6093230247497559,
+      "learning_rate": 7.466666666666667e-08,
+      "loss": 0.0687,
+      "step": 7950
+    },
+    {
+      "epoch": 0.996875,
+      "grad_norm": 1.730695366859436,
+      "learning_rate": 4.133333333333334e-08,
+      "loss": 0.0814,
+      "step": 7975
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.418311595916748,
+      "learning_rate": 8e-09,
+      "loss": 0.0959,
+      "step": 8000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.1835634410381317,
+      "eval_runtime": 154.4338,
+      "eval_samples_per_second": 13.624,
+      "eval_steps_per_second": 0.855,
+      "eval_wer": 10.886229784051602,
+      "step": 8000
+    },
+    {
+      "epoch": 1.0,
+      "step": 8000,
+      "total_flos": 7.387786248192e+19,
+      "train_loss": 0.17036041705310345,
+      "train_runtime": 11036.9074,
+      "train_samples_per_second": 23.195,
+      "train_steps_per_second": 0.725
     }
   ],
+  "logging_steps": 25,
+  "max_steps": 8000,
+  "num_input_tokens_seen": 0,
   "num_train_epochs": 9223372036854775807,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.387786248192e+19,
+  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null
 }

wandb/run-20250212_152709-lejyafmi/files/output.log CHANGED Viewed

@@ -1612,3 +1612,171 @@ It seems you are trying to upload a large folder at once. This might take some t
 /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
   warnings.warn(
 run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.62M/4.62M [00:01<00:00, 3.10MB/s]

 /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
   warnings.warn(
 run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
+***** train metrics *****
+  epoch                    =           1.0
+  total_flos               = 68804121093GF
+  train_loss               =        0.1704
+  train_runtime            =    3:03:56.90
+  train_samples_per_second =        23.195
+  train_steps_per_second   =         0.725
+02/12/2025 18:34:14 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:4176] 2025-02-12 18:34:14,390 >>
+***** Running Evaluation *****
+[INFO|trainer.py:4180] 2025-02-12 18:34:14,390 >>   Num examples: Unknown
+[INFO|trainer.py:4181] 2025-02-12 18:34:14,390 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-12 18:34:21,770 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:21,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:23,093 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:24,451 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:25,719 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:26,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:28,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:29,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:30,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:31,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:32,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:33,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:34,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:35,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:36,700 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:37,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:38,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:39,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:40,537 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:41,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:42,380 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:43,354 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:44,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:45,314 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:46,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:47,281 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:48,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:49,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:50,458 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:51,370 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:52,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:53,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:54,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:55,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:56,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:57,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:58,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:34:59,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:00,681 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:01,717 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:03,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:04,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:05,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:06,930 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:07,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:08,880 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:09,968 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:10,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:11,921 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:14,074 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:15,109 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:16,148 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:17,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:18,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:19,129 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:20,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:21,190 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:22,160 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:23,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:24,153 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:25,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:26,238 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:27,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:28,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:29,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:30,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:31,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:32,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:33,287 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:34,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:35,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:36,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:37,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:38,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:39,530 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:40,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:41,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:42,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:43,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:44,509 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:45,496 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:46,461 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:47,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:48,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:49,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:50,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:51,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:52,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:53,784 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:54,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:55,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:56,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:57,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:35:58,926 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:00,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:01,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:02,083 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:03,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:04,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:05,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:06,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:07,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:08,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:09,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:10,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:11,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:12,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:13,562 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:14,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:15,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:16,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:17,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:18,568 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:19,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:20,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:21,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:22,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:23,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:24,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:25,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:26,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:27,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:28,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:29,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:30,585 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:31,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:32,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:33,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:34,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:35,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:36,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-12 18:36:37,437 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+***** eval metrics *****
+  epoch                   =        1.0
+  eval_loss               =     0.1836
+  eval_runtime            = 0:02:31.38
+  eval_samples_per_second =     13.899
+  eval_steps_per_second   =      0.872
+  eval_wer                =    10.8862
+[INFO|trainer.py:3860] 2025-02-12 18:36:45,773 >> Saving model checkpoint to ./
+[INFO|configuration_utils.py:423] 2025-02-12 18:36:45,774 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:906] 2025-02-12 18:36:45,775 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-12 18:36:47,949 >> Model weights saved in ./model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-12 18:36:47,950 >> Feature extractor saved in ./preprocessor_config.json
+It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+02/12/2025 18:36:51 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+run-lejyafmi.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69M/4.69M [00:01<00:00, 3.23MB/s]

wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d45ae7708451f569a26149093624dc7494943def519a728a6ef4093ad80dd382
-size 4620288

 version https://git-lfs.github.com/spec/v1
+oid sha256:218ae98ab28234be327e4ea9293f7b5d13580cf3d80509614063d5a55716991b
+size 4685824