xezpeleta commited on
Commit
a299f4c
Β·
verified Β·
1 Parent(s): 46060cf

End of training

Browse files
README.md CHANGED
@@ -3,20 +3,33 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: openai/whisper-small
5
  tags:
 
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
- - name: openai/whisper-small
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # openai/whisper-small
18
 
19
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1836
22
  - Wer: 10.8862
 
3
  license: apache-2.0
4
  base_model: openai/whisper-small
5
  tags:
6
+ - whisper-event
7
  - generated_from_trainer
8
+ datasets:
9
+ - asierhv/composite_corpus_eu_v2.1
10
  metrics:
11
  - wer
12
  model-index:
13
+ - name: Whisper Small Basque
14
+ results:
15
+ - task:
16
+ name: Automatic Speech Recognition
17
+ type: automatic-speech-recognition
18
+ dataset:
19
+ name: asierhv/composite_corpus_eu_v2.1
20
+ type: asierhv/composite_corpus_eu_v2.1
21
+ metrics:
22
+ - name: Wer
23
+ type: wer
24
+ value: 10.886229784051602
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
  should probably proofread and complete it, then remove this comment. -->
29
 
30
+ # Whisper Small Basque
31
 
32
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the asierhv/composite_corpus_eu_v2.1 dataset.
33
  It achieves the following results on the evaluation set:
34
  - Loss: 0.1836
35
  - Wer: 10.8862
all_results.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
- "epoch": 6.06,
3
- "eval_loss": 0.19964517652988434,
4
- "eval_runtime": 1715.2587,
5
- "eval_samples_per_second": 7.296,
6
- "eval_steps_per_second": 0.456,
7
- "eval_wer": 12.012786552211754,
8
- "train_loss": 0.15837200704216958,
9
- "train_runtime": 29005.3522,
10
- "train_samples_per_second": 5.516,
11
- "train_steps_per_second": 0.172
 
12
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.1835634410381317,
4
+ "eval_runtime": 151.3822,
5
+ "eval_samples_per_second": 13.899,
6
+ "eval_steps_per_second": 0.872,
7
+ "eval_wer": 10.886229784051602,
8
+ "total_flos": 7.387786248192e+19,
9
+ "train_loss": 0.17036041705310345,
10
+ "train_runtime": 11036.9074,
11
+ "train_samples_per_second": 23.195,
12
+ "train_steps_per_second": 0.725
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.06,
3
- "eval_loss": 0.19964517652988434,
4
- "eval_runtime": 1715.2587,
5
- "eval_samples_per_second": 7.296,
6
- "eval_steps_per_second": 0.456,
7
- "eval_wer": 12.012786552211754
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.1835634410381317,
4
+ "eval_runtime": 151.3822,
5
+ "eval_samples_per_second": 13.899,
6
+ "eval_steps_per_second": 0.872,
7
+ "eval_wer": 10.886229784051602
8
  }
train_results.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "epoch": 6.06,
3
- "train_loss": 0.15837200704216958,
4
- "train_runtime": 29005.3522,
5
- "train_samples_per_second": 5.516,
6
- "train_steps_per_second": 0.172
 
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "total_flos": 7.387786248192e+19,
4
+ "train_loss": 0.17036041705310345,
5
+ "train_runtime": 11036.9074,
6
+ "train_samples_per_second": 23.195,
7
+ "train_steps_per_second": 0.725
8
  }
trainer_state.json CHANGED
@@ -1,1270 +1,2354 @@
1
  {
2
- "best_metric": 12.012786552211754,
3
- "best_model_checkpoint": "./checkpoint-5000",
4
- "epoch": 6.0604,
5
- "global_step": 5000,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
 
12
  "learning_rate": 4.4e-07,
13
- "loss": 1.6183,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.01,
 
18
  "learning_rate": 9.400000000000001e-07,
19
- "loss": 1.4856,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 0.01,
 
24
  "learning_rate": 1.44e-06,
25
- "loss": 1.1898,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 0.02,
 
30
  "learning_rate": 1.94e-06,
31
- "loss": 0.9354,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.03,
 
36
  "learning_rate": 2.4400000000000004e-06,
37
- "loss": 0.8172,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 0.03,
 
42
  "learning_rate": 2.9400000000000002e-06,
43
- "loss": 0.7386,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 0.04,
 
48
  "learning_rate": 3.44e-06,
49
- "loss": 0.6768,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 0.04,
 
54
  "learning_rate": 3.94e-06,
55
- "loss": 0.6069,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 0.04,
 
60
  "learning_rate": 4.440000000000001e-06,
61
- "loss": 0.5739,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 0.05,
 
66
  "learning_rate": 4.94e-06,
67
- "loss": 0.5269,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.06,
 
72
  "learning_rate": 5.4400000000000004e-06,
73
- "loss": 0.5011,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 0.06,
 
78
  "learning_rate": 5.94e-06,
79
- "loss": 0.4842,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.07,
 
84
  "learning_rate": 6.440000000000001e-06,
85
- "loss": 0.4698,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 0.07,
 
90
  "learning_rate": 6.9400000000000005e-06,
91
- "loss": 0.446,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 0.07,
 
96
  "learning_rate": 7.440000000000001e-06,
97
- "loss": 0.4378,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 0.08,
 
102
  "learning_rate": 7.94e-06,
103
- "loss": 0.3966,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.09,
 
108
  "learning_rate": 8.44e-06,
109
- "loss": 0.3916,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 0.09,
 
114
  "learning_rate": 8.94e-06,
115
- "loss": 0.3746,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.1,
 
120
  "learning_rate": 9.440000000000001e-06,
121
- "loss": 0.3372,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 0.1,
 
126
  "learning_rate": 9.940000000000001e-06,
127
- "loss": 0.329,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 0.1,
132
- "learning_rate": 9.951111111111111e-06,
133
- "loss": 0.3364,
 
134
  "step": 525
135
  },
136
  {
137
- "epoch": 0.11,
138
- "learning_rate": 9.895555555555557e-06,
139
- "loss": 0.3074,
 
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.12,
144
- "learning_rate": 9.84e-06,
145
- "loss": 0.3134,
 
146
  "step": 575
147
  },
148
  {
149
- "epoch": 0.12,
150
- "learning_rate": 9.784444444444445e-06,
151
- "loss": 0.348,
 
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.12,
156
- "learning_rate": 9.72888888888889e-06,
157
- "loss": 0.3037,
 
158
  "step": 625
159
  },
160
  {
161
- "epoch": 0.13,
162
- "learning_rate": 9.673333333333334e-06,
163
- "loss": 0.2768,
 
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.14,
168
- "learning_rate": 9.617777777777778e-06,
169
- "loss": 0.3341,
 
170
  "step": 675
171
  },
172
  {
173
- "epoch": 0.14,
174
- "learning_rate": 9.562222222222223e-06,
175
- "loss": 0.3203,
 
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.14,
180
- "learning_rate": 9.506666666666667e-06,
181
- "loss": 0.2681,
 
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.15,
186
- "learning_rate": 9.451111111111112e-06,
187
- "loss": 0.2906,
 
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.15,
192
- "learning_rate": 9.395555555555556e-06,
193
- "loss": 0.2918,
 
194
  "step": 775
195
  },
196
  {
197
- "epoch": 1.0,
198
- "learning_rate": 9.340000000000002e-06,
199
- "loss": 0.2697,
 
200
  "step": 800
201
  },
202
  {
203
- "epoch": 1.01,
204
- "learning_rate": 9.284444444444444e-06,
205
- "loss": 0.2868,
 
206
  "step": 825
207
  },
208
  {
209
- "epoch": 1.01,
210
- "learning_rate": 9.22888888888889e-06,
211
- "loss": 0.2654,
 
212
  "step": 850
213
  },
214
  {
215
- "epoch": 1.02,
216
- "learning_rate": 9.173333333333334e-06,
217
- "loss": 0.2549,
 
218
  "step": 875
219
  },
220
  {
221
- "epoch": 1.02,
222
- "learning_rate": 9.117777777777778e-06,
223
- "loss": 0.2508,
 
224
  "step": 900
225
  },
226
  {
227
- "epoch": 1.03,
228
- "learning_rate": 9.062222222222224e-06,
229
- "loss": 0.2285,
 
230
  "step": 925
231
  },
232
  {
233
- "epoch": 1.03,
234
- "learning_rate": 9.006666666666666e-06,
235
- "loss": 0.2362,
 
236
  "step": 950
237
  },
238
  {
239
- "epoch": 1.04,
240
- "learning_rate": 8.951111111111112e-06,
241
- "loss": 0.2239,
 
242
  "step": 975
243
  },
244
  {
245
- "epoch": 1.04,
246
- "learning_rate": 8.895555555555556e-06,
247
- "loss": 0.2009,
 
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 1.04,
252
- "eval_loss": 0.24461345374584198,
253
- "eval_runtime": 1705.6758,
254
- "eval_samples_per_second": 7.337,
255
- "eval_steps_per_second": 0.459,
256
- "eval_wer": 17.688063892927563,
257
  "step": 1000
258
  },
259
  {
260
- "epoch": 1.05,
261
- "learning_rate": 8.84e-06,
262
- "loss": 0.196,
 
263
  "step": 1025
264
  },
265
  {
266
- "epoch": 1.05,
267
- "learning_rate": 8.784444444444446e-06,
268
- "loss": 0.2057,
 
269
  "step": 1050
270
  },
271
  {
272
- "epoch": 1.06,
273
- "learning_rate": 8.72888888888889e-06,
274
- "loss": 0.2115,
 
275
  "step": 1075
276
  },
277
  {
278
- "epoch": 1.06,
279
- "learning_rate": 8.673333333333334e-06,
280
- "loss": 0.1891,
 
281
  "step": 1100
282
  },
283
  {
284
- "epoch": 1.07,
285
- "learning_rate": 8.617777777777778e-06,
286
- "loss": 0.1985,
 
287
  "step": 1125
288
  },
289
  {
290
- "epoch": 1.07,
291
- "learning_rate": 8.562222222222224e-06,
292
- "loss": 0.184,
 
293
  "step": 1150
294
  },
295
  {
296
- "epoch": 1.08,
297
- "learning_rate": 8.506666666666668e-06,
298
- "loss": 0.1581,
 
299
  "step": 1175
300
  },
301
  {
302
- "epoch": 1.08,
303
- "learning_rate": 8.451111111111112e-06,
304
- "loss": 0.1609,
 
305
  "step": 1200
306
  },
307
  {
308
- "epoch": 1.09,
309
- "learning_rate": 8.395555555555557e-06,
310
- "loss": 0.1528,
 
311
  "step": 1225
312
  },
313
  {
314
- "epoch": 1.09,
315
- "learning_rate": 8.34e-06,
316
- "loss": 0.1387,
 
317
  "step": 1250
318
  },
319
  {
320
- "epoch": 1.1,
321
- "learning_rate": 8.284444444444446e-06,
322
- "loss": 0.1312,
 
323
  "step": 1275
324
  },
325
  {
326
- "epoch": 1.1,
327
- "learning_rate": 8.22888888888889e-06,
328
- "loss": 0.1471,
 
329
  "step": 1300
330
  },
331
  {
332
- "epoch": 1.11,
333
- "learning_rate": 8.173333333333334e-06,
334
- "loss": 0.1208,
 
335
  "step": 1325
336
  },
337
  {
338
- "epoch": 1.11,
339
- "learning_rate": 8.11777777777778e-06,
340
- "loss": 0.1387,
 
341
  "step": 1350
342
  },
343
  {
344
- "epoch": 1.12,
345
- "learning_rate": 8.062222222222222e-06,
346
- "loss": 0.1393,
 
347
  "step": 1375
348
  },
349
  {
350
- "epoch": 1.12,
351
- "learning_rate": 8.006666666666667e-06,
352
- "loss": 0.1347,
 
353
  "step": 1400
354
  },
355
  {
356
- "epoch": 1.13,
357
- "learning_rate": 7.951111111111111e-06,
358
- "loss": 0.1278,
 
359
  "step": 1425
360
  },
361
  {
362
- "epoch": 1.13,
363
- "learning_rate": 7.895555555555557e-06,
364
- "loss": 0.1528,
 
365
  "step": 1450
366
  },
367
  {
368
- "epoch": 1.14,
369
- "learning_rate": 7.840000000000001e-06,
370
- "loss": 0.1614,
 
371
  "step": 1475
372
  },
373
  {
374
- "epoch": 1.14,
375
- "learning_rate": 7.784444444444445e-06,
376
- "loss": 0.1255,
 
377
  "step": 1500
378
  },
379
  {
380
- "epoch": 1.15,
381
- "learning_rate": 7.72888888888889e-06,
382
- "loss": 0.138,
 
383
  "step": 1525
384
  },
385
  {
386
- "epoch": 1.15,
387
- "learning_rate": 7.673333333333333e-06,
388
- "loss": 0.1341,
 
389
  "step": 1550
390
  },
391
  {
392
- "epoch": 2.0,
393
- "learning_rate": 7.617777777777778e-06,
394
- "loss": 0.1223,
 
395
  "step": 1575
396
  },
397
  {
398
- "epoch": 2.01,
399
- "learning_rate": 7.562222222222223e-06,
400
- "loss": 0.1272,
 
401
  "step": 1600
402
  },
403
  {
404
- "epoch": 2.01,
405
- "learning_rate": 7.506666666666668e-06,
406
- "loss": 0.1324,
 
407
  "step": 1625
408
  },
409
  {
410
- "epoch": 2.02,
411
- "learning_rate": 7.451111111111111e-06,
412
- "loss": 0.1277,
 
413
  "step": 1650
414
  },
415
  {
416
- "epoch": 2.02,
417
- "learning_rate": 7.395555555555556e-06,
418
- "loss": 0.1184,
 
419
  "step": 1675
420
  },
421
  {
422
- "epoch": 2.03,
423
- "learning_rate": 7.340000000000001e-06,
424
- "loss": 0.1117,
 
425
  "step": 1700
426
  },
427
  {
428
- "epoch": 2.03,
429
- "learning_rate": 7.284444444444445e-06,
430
- "loss": 0.1092,
 
431
  "step": 1725
432
  },
433
  {
434
- "epoch": 2.04,
435
- "learning_rate": 7.22888888888889e-06,
436
- "loss": 0.1027,
 
437
  "step": 1750
438
  },
439
  {
440
- "epoch": 2.04,
441
- "learning_rate": 7.173333333333335e-06,
442
- "loss": 0.1018,
 
443
  "step": 1775
444
  },
445
  {
446
- "epoch": 2.05,
447
- "learning_rate": 7.117777777777778e-06,
448
- "loss": 0.0909,
 
449
  "step": 1800
450
  },
451
  {
452
- "epoch": 2.05,
453
- "learning_rate": 7.062222222222223e-06,
454
- "loss": 0.104,
 
455
  "step": 1825
456
  },
457
  {
458
- "epoch": 2.06,
459
- "learning_rate": 7.006666666666667e-06,
460
- "loss": 0.1002,
 
461
  "step": 1850
462
  },
463
  {
464
- "epoch": 2.06,
465
- "learning_rate": 6.951111111111112e-06,
466
- "loss": 0.0964,
 
467
  "step": 1875
468
  },
469
  {
470
- "epoch": 2.07,
471
- "learning_rate": 6.8955555555555565e-06,
472
- "loss": 0.0874,
 
473
  "step": 1900
474
  },
475
  {
476
- "epoch": 2.07,
477
- "learning_rate": 6.8400000000000014e-06,
478
- "loss": 0.0885,
 
479
  "step": 1925
480
  },
481
  {
482
- "epoch": 2.08,
483
- "learning_rate": 6.784444444444445e-06,
484
- "loss": 0.084,
 
485
  "step": 1950
486
  },
487
  {
488
- "epoch": 2.08,
489
- "learning_rate": 6.7288888888888895e-06,
490
- "loss": 0.0766,
 
491
  "step": 1975
492
  },
493
  {
494
- "epoch": 2.09,
495
- "learning_rate": 6.6733333333333335e-06,
496
- "loss": 0.0759,
 
497
  "step": 2000
498
  },
499
  {
500
- "epoch": 2.09,
501
- "eval_loss": 0.21021738648414612,
502
- "eval_runtime": 1689.1729,
503
- "eval_samples_per_second": 7.409,
504
- "eval_steps_per_second": 0.464,
505
- "eval_wer": 14.258399888466212,
506
  "step": 2000
507
  },
508
  {
509
- "epoch": 2.09,
510
- "learning_rate": 6.617777777777778e-06,
511
- "loss": 0.0705,
 
512
  "step": 2025
513
  },
514
  {
515
- "epoch": 2.1,
516
- "learning_rate": 6.562222222222223e-06,
517
- "loss": 0.0634,
 
518
  "step": 2050
519
  },
520
  {
521
- "epoch": 2.1,
522
- "learning_rate": 6.5066666666666665e-06,
523
- "loss": 0.0671,
 
524
  "step": 2075
525
  },
526
  {
527
- "epoch": 2.11,
528
- "learning_rate": 6.451111111111111e-06,
529
- "loss": 0.0658,
 
530
  "step": 2100
531
  },
532
  {
533
- "epoch": 2.11,
534
- "learning_rate": 6.395555555555556e-06,
535
- "loss": 0.0601,
 
536
  "step": 2125
537
  },
538
  {
539
- "epoch": 2.12,
540
- "learning_rate": 6.34e-06,
541
- "loss": 0.0701,
 
542
  "step": 2150
543
  },
544
  {
545
- "epoch": 2.12,
546
- "learning_rate": 6.284444444444445e-06,
547
- "loss": 0.0642,
 
548
  "step": 2175
549
  },
550
  {
551
- "epoch": 2.13,
552
- "learning_rate": 6.22888888888889e-06,
553
- "loss": 0.0612,
 
554
  "step": 2200
555
  },
556
  {
557
- "epoch": 2.13,
558
- "learning_rate": 6.173333333333333e-06,
559
- "loss": 0.0571,
 
560
  "step": 2225
561
  },
562
  {
563
- "epoch": 2.14,
564
- "learning_rate": 6.117777777777778e-06,
565
- "loss": 0.1102,
 
566
  "step": 2250
567
  },
568
  {
569
- "epoch": 2.14,
570
- "learning_rate": 6.062222222222223e-06,
571
- "loss": 0.0744,
 
572
  "step": 2275
573
  },
574
  {
575
- "epoch": 2.15,
576
- "learning_rate": 6.006666666666667e-06,
577
- "loss": 0.0638,
 
578
  "step": 2300
579
  },
580
  {
581
- "epoch": 2.15,
582
- "learning_rate": 5.951111111111112e-06,
583
- "loss": 0.0638,
 
584
  "step": 2325
585
  },
586
  {
587
- "epoch": 3.0,
588
- "learning_rate": 5.895555555555557e-06,
589
- "loss": 0.0601,
 
590
  "step": 2350
591
  },
592
  {
593
- "epoch": 3.01,
594
- "learning_rate": 5.84e-06,
595
- "loss": 0.056,
 
596
  "step": 2375
597
  },
598
  {
599
- "epoch": 3.01,
600
- "learning_rate": 5.784444444444445e-06,
601
- "loss": 0.062,
 
602
  "step": 2400
603
  },
604
  {
605
- "epoch": 3.02,
606
- "learning_rate": 5.72888888888889e-06,
607
- "loss": 0.0645,
 
608
  "step": 2425
609
  },
610
  {
611
- "epoch": 3.02,
612
- "learning_rate": 5.673333333333334e-06,
613
- "loss": 0.0595,
 
614
  "step": 2450
615
  },
616
  {
617
- "epoch": 3.03,
618
- "learning_rate": 5.617777777777779e-06,
619
- "loss": 0.0543,
 
620
  "step": 2475
621
  },
622
  {
623
- "epoch": 3.03,
624
- "learning_rate": 5.562222222222222e-06,
625
- "loss": 0.0525,
 
626
  "step": 2500
627
  },
628
  {
629
- "epoch": 3.04,
630
- "learning_rate": 5.506666666666667e-06,
631
- "loss": 0.0494,
 
632
  "step": 2525
633
  },
634
  {
635
- "epoch": 3.04,
636
- "learning_rate": 5.451111111111112e-06,
637
- "loss": 0.0473,
 
638
  "step": 2550
639
  },
640
  {
641
- "epoch": 3.05,
642
- "learning_rate": 5.3955555555555565e-06,
643
- "loss": 0.0483,
 
644
  "step": 2575
645
  },
646
  {
647
- "epoch": 3.05,
648
- "learning_rate": 5.3400000000000005e-06,
649
- "loss": 0.0467,
 
650
  "step": 2600
651
  },
652
  {
653
- "epoch": 3.06,
654
- "learning_rate": 5.2844444444444454e-06,
655
- "loss": 0.0503,
 
656
  "step": 2625
657
  },
658
  {
659
- "epoch": 3.06,
660
- "learning_rate": 5.228888888888889e-06,
661
- "loss": 0.0428,
 
662
  "step": 2650
663
  },
664
  {
665
- "epoch": 3.07,
666
- "learning_rate": 5.1733333333333335e-06,
667
- "loss": 0.0418,
 
668
  "step": 2675
669
  },
670
  {
671
- "epoch": 3.07,
672
- "learning_rate": 5.117777777777778e-06,
673
- "loss": 0.0424,
 
674
  "step": 2700
675
  },
676
  {
677
- "epoch": 3.08,
678
- "learning_rate": 5.062222222222222e-06,
679
- "loss": 0.0406,
 
680
  "step": 2725
681
  },
682
  {
683
- "epoch": 3.08,
684
- "learning_rate": 5.006666666666667e-06,
685
- "loss": 0.0327,
 
686
  "step": 2750
687
  },
688
  {
689
- "epoch": 3.09,
690
- "learning_rate": 4.951111111111111e-06,
691
- "loss": 0.0371,
 
692
  "step": 2775
693
  },
694
  {
695
- "epoch": 3.09,
696
- "learning_rate": 4.895555555555556e-06,
697
- "loss": 0.0313,
 
698
  "step": 2800
699
  },
700
  {
701
- "epoch": 3.1,
702
- "learning_rate": 4.84e-06,
703
- "loss": 0.0295,
 
704
  "step": 2825
705
  },
706
  {
707
- "epoch": 3.1,
708
- "learning_rate": 4.784444444444445e-06,
709
- "loss": 0.0284,
 
710
  "step": 2850
711
  },
712
  {
713
- "epoch": 3.11,
714
- "learning_rate": 4.728888888888889e-06,
715
- "loss": 0.0322,
 
716
  "step": 2875
717
  },
718
  {
719
- "epoch": 3.11,
720
- "learning_rate": 4.673333333333333e-06,
721
- "loss": 0.0307,
 
722
  "step": 2900
723
  },
724
  {
725
- "epoch": 3.12,
726
- "learning_rate": 4.617777777777778e-06,
727
- "loss": 0.0316,
 
728
  "step": 2925
729
  },
730
  {
731
- "epoch": 3.12,
732
- "learning_rate": 4.562222222222222e-06,
733
- "loss": 0.0286,
 
734
  "step": 2950
735
  },
736
  {
737
- "epoch": 3.13,
738
- "learning_rate": 4.506666666666667e-06,
739
- "loss": 0.0289,
 
740
  "step": 2975
741
  },
742
  {
743
- "epoch": 3.13,
744
- "learning_rate": 4.451111111111112e-06,
745
- "loss": 0.0264,
 
746
  "step": 3000
747
  },
748
  {
749
- "epoch": 3.13,
750
- "eval_loss": 0.2200043797492981,
751
- "eval_runtime": 1693.8446,
752
- "eval_samples_per_second": 7.389,
753
- "eval_steps_per_second": 0.462,
754
- "eval_wer": 13.689776733254993,
755
  "step": 3000
756
  },
757
  {
758
- "epoch": 3.14,
759
- "learning_rate": 4.395555555555556e-06,
760
- "loss": 0.0609,
 
761
  "step": 3025
762
  },
763
  {
764
- "epoch": 3.14,
765
- "learning_rate": 4.34e-06,
766
- "loss": 0.0455,
 
767
  "step": 3050
768
  },
769
  {
770
- "epoch": 3.15,
771
- "learning_rate": 4.284444444444445e-06,
772
- "loss": 0.0323,
 
773
  "step": 3075
774
  },
775
  {
776
- "epoch": 3.15,
777
- "learning_rate": 4.228888888888889e-06,
778
- "loss": 0.0298,
 
779
  "step": 3100
780
  },
781
  {
782
- "epoch": 3.16,
783
- "learning_rate": 4.173333333333334e-06,
784
- "loss": 0.0265,
 
785
  "step": 3125
786
  },
787
  {
788
- "epoch": 4.0,
789
- "learning_rate": 4.117777777777779e-06,
790
- "loss": 0.1155,
 
791
  "step": 3150
792
  },
793
  {
794
- "epoch": 4.01,
795
- "learning_rate": 4.062222222222223e-06,
796
- "loss": 0.1149,
 
797
  "step": 3175
798
  },
799
  {
800
- "epoch": 4.01,
801
- "learning_rate": 4.006666666666667e-06,
802
- "loss": 0.1179,
 
803
  "step": 3200
804
  },
805
  {
806
- "epoch": 4.02,
807
- "learning_rate": 3.951111111111112e-06,
808
- "loss": 0.1228,
 
809
  "step": 3225
810
  },
811
  {
812
- "epoch": 4.02,
813
- "learning_rate": 3.895555555555556e-06,
814
- "loss": 0.1806,
 
815
  "step": 3250
816
  },
817
  {
818
- "epoch": 4.03,
819
- "learning_rate": 3.8400000000000005e-06,
820
- "loss": 0.2425,
 
821
  "step": 3275
822
  },
823
  {
824
- "epoch": 4.03,
825
- "learning_rate": 3.784444444444445e-06,
826
- "loss": 0.1122,
 
827
  "step": 3300
828
  },
829
  {
830
- "epoch": 4.04,
831
- "learning_rate": 3.728888888888889e-06,
832
- "loss": 0.0986,
 
833
  "step": 3325
834
  },
835
  {
836
- "epoch": 4.04,
837
- "learning_rate": 3.673333333333334e-06,
838
- "loss": 0.0942,
 
839
  "step": 3350
840
  },
841
  {
842
- "epoch": 4.05,
843
- "learning_rate": 3.617777777777778e-06,
844
- "loss": 0.1099,
 
845
  "step": 3375
846
  },
847
  {
848
- "epoch": 4.05,
849
- "learning_rate": 3.5622222222222224e-06,
850
- "loss": 0.1332,
 
851
  "step": 3400
852
  },
853
  {
854
- "epoch": 4.06,
855
- "learning_rate": 3.5066666666666673e-06,
856
- "loss": 0.1368,
 
857
  "step": 3425
858
  },
859
  {
860
- "epoch": 4.06,
861
- "learning_rate": 3.4511111111111113e-06,
862
- "loss": 0.164,
 
863
  "step": 3450
864
  },
865
  {
866
- "epoch": 4.07,
867
- "learning_rate": 3.3955555555555558e-06,
868
- "loss": 0.174,
 
869
  "step": 3475
870
  },
871
  {
872
- "epoch": 4.07,
873
- "learning_rate": 3.3400000000000006e-06,
874
- "loss": 0.1675,
 
875
  "step": 3500
876
  },
877
  {
878
- "epoch": 4.08,
879
- "learning_rate": 3.2844444444444447e-06,
880
- "loss": 0.0996,
 
881
  "step": 3525
882
  },
883
  {
884
- "epoch": 4.08,
885
- "learning_rate": 3.228888888888889e-06,
886
- "loss": 0.0795,
 
887
  "step": 3550
888
  },
889
  {
890
- "epoch": 4.09,
891
- "learning_rate": 3.173333333333334e-06,
892
- "loss": 0.0683,
 
893
  "step": 3575
894
  },
895
  {
896
- "epoch": 4.09,
897
- "learning_rate": 3.117777777777778e-06,
898
- "loss": 0.0635,
 
899
  "step": 3600
900
  },
901
  {
902
- "epoch": 4.1,
903
- "learning_rate": 3.0622222222222225e-06,
904
- "loss": 0.0751,
 
905
  "step": 3625
906
  },
907
  {
908
- "epoch": 4.1,
909
- "learning_rate": 3.0066666666666674e-06,
910
- "loss": 0.0712,
 
911
  "step": 3650
912
  },
913
  {
914
- "epoch": 4.11,
915
- "learning_rate": 2.9511111111111114e-06,
916
- "loss": 0.0703,
 
917
  "step": 3675
918
  },
919
  {
920
- "epoch": 4.11,
921
- "learning_rate": 2.895555555555556e-06,
922
- "loss": 0.1152,
 
923
  "step": 3700
924
  },
925
  {
926
- "epoch": 4.12,
927
- "learning_rate": 2.84e-06,
928
- "loss": 0.1039,
 
929
  "step": 3725
930
  },
931
  {
932
- "epoch": 4.12,
933
- "learning_rate": 2.784444444444445e-06,
934
- "loss": 0.0863,
 
935
  "step": 3750
936
  },
937
  {
938
- "epoch": 4.13,
939
- "learning_rate": 2.7288888888888893e-06,
940
- "loss": 0.0882,
 
941
  "step": 3775
942
  },
943
  {
944
- "epoch": 4.13,
945
- "learning_rate": 2.6733333333333333e-06,
946
- "loss": 0.0677,
 
947
  "step": 3800
948
  },
949
  {
950
- "epoch": 4.14,
951
- "learning_rate": 2.617777777777778e-06,
952
- "loss": 0.0511,
 
953
  "step": 3825
954
  },
955
  {
956
- "epoch": 4.14,
957
- "learning_rate": 2.5622222222222226e-06,
958
- "loss": 0.0283,
 
959
  "step": 3850
960
  },
961
  {
962
- "epoch": 4.15,
963
- "learning_rate": 2.5066666666666667e-06,
964
- "loss": 0.0246,
 
965
  "step": 3875
966
  },
967
  {
968
- "epoch": 4.15,
969
- "learning_rate": 2.451111111111111e-06,
970
- "loss": 0.0231,
 
971
  "step": 3900
972
  },
973
  {
974
- "epoch": 5.0,
975
- "learning_rate": 2.3955555555555556e-06,
976
- "loss": 0.0442,
 
977
  "step": 3925
978
  },
979
  {
980
- "epoch": 5.01,
981
- "learning_rate": 2.3400000000000005e-06,
982
- "loss": 0.063,
 
983
  "step": 3950
984
  },
985
  {
986
- "epoch": 5.01,
987
- "learning_rate": 2.2844444444444445e-06,
988
- "loss": 0.0548,
 
989
  "step": 3975
990
  },
991
  {
992
- "epoch": 5.02,
993
- "learning_rate": 2.228888888888889e-06,
994
- "loss": 0.0633,
 
995
  "step": 4000
996
  },
997
  {
998
- "epoch": 5.02,
999
- "eval_loss": 0.1954876184463501,
1000
- "eval_runtime": 1710.419,
1001
- "eval_samples_per_second": 7.317,
1002
- "eval_steps_per_second": 0.458,
1003
- "eval_wer": 12.553526260232228,
1004
  "step": 4000
1005
  },
1006
  {
1007
- "epoch": 5.02,
1008
- "learning_rate": 2.1733333333333334e-06,
1009
- "loss": 0.0801,
 
1010
  "step": 4025
1011
  },
1012
  {
1013
- "epoch": 5.03,
1014
- "learning_rate": 2.117777777777778e-06,
1015
- "loss": 0.1296,
 
1016
  "step": 4050
1017
  },
1018
  {
1019
- "epoch": 5.03,
1020
- "learning_rate": 2.0622222222222223e-06,
1021
- "loss": 0.0944,
 
1022
  "step": 4075
1023
  },
1024
  {
1025
- "epoch": 5.04,
1026
- "learning_rate": 2.006666666666667e-06,
1027
- "loss": 0.0541,
 
1028
  "step": 4100
1029
  },
1030
  {
1031
- "epoch": 5.04,
1032
- "learning_rate": 1.9511111111111113e-06,
1033
- "loss": 0.0511,
 
1034
  "step": 4125
1035
  },
1036
  {
1037
- "epoch": 5.05,
1038
- "learning_rate": 1.8955555555555557e-06,
1039
- "loss": 0.0524,
 
1040
  "step": 4150
1041
  },
1042
  {
1043
- "epoch": 5.05,
1044
- "learning_rate": 1.8400000000000002e-06,
1045
- "loss": 0.0656,
 
1046
  "step": 4175
1047
  },
1048
  {
1049
- "epoch": 5.06,
1050
- "learning_rate": 1.7844444444444444e-06,
1051
- "loss": 0.0675,
 
1052
  "step": 4200
1053
  },
1054
  {
1055
- "epoch": 5.06,
1056
- "learning_rate": 1.728888888888889e-06,
1057
- "loss": 0.0916,
 
1058
  "step": 4225
1059
  },
1060
  {
1061
- "epoch": 5.07,
1062
- "learning_rate": 1.6733333333333335e-06,
1063
- "loss": 0.0977,
 
1064
  "step": 4250
1065
  },
1066
  {
1067
- "epoch": 5.07,
1068
- "learning_rate": 1.6177777777777778e-06,
1069
- "loss": 0.108,
 
1070
  "step": 4275
1071
  },
1072
  {
1073
- "epoch": 5.08,
1074
- "learning_rate": 1.5622222222222225e-06,
1075
- "loss": 0.0672,
 
1076
  "step": 4300
1077
  },
1078
  {
1079
- "epoch": 5.08,
1080
- "learning_rate": 1.506666666666667e-06,
1081
- "loss": 0.0444,
 
1082
  "step": 4325
1083
  },
1084
  {
1085
- "epoch": 5.09,
1086
- "learning_rate": 1.4511111111111112e-06,
1087
- "loss": 0.0371,
 
1088
  "step": 4350
1089
  },
1090
  {
1091
- "epoch": 5.09,
1092
- "learning_rate": 1.3955555555555556e-06,
1093
- "loss": 0.0368,
 
1094
  "step": 4375
1095
  },
1096
  {
1097
- "epoch": 5.1,
1098
- "learning_rate": 1.34e-06,
1099
- "loss": 0.038,
 
1100
  "step": 4400
1101
  },
1102
  {
1103
- "epoch": 5.1,
1104
- "learning_rate": 1.2844444444444445e-06,
1105
- "loss": 0.0366,
 
1106
  "step": 4425
1107
  },
1108
  {
1109
- "epoch": 5.11,
1110
- "learning_rate": 1.228888888888889e-06,
1111
- "loss": 0.041,
 
1112
  "step": 4450
1113
  },
1114
  {
1115
- "epoch": 5.11,
1116
- "learning_rate": 1.1733333333333335e-06,
1117
- "loss": 0.0613,
 
1118
  "step": 4475
1119
  },
1120
  {
1121
- "epoch": 5.12,
1122
- "learning_rate": 1.117777777777778e-06,
1123
- "loss": 0.06,
 
1124
  "step": 4500
1125
  },
1126
  {
1127
- "epoch": 5.12,
1128
- "learning_rate": 1.0622222222222222e-06,
1129
- "loss": 0.0519,
 
1130
  "step": 4525
1131
  },
1132
  {
1133
- "epoch": 5.13,
1134
- "learning_rate": 1.0066666666666668e-06,
1135
- "loss": 0.0475,
 
1136
  "step": 4550
1137
  },
1138
  {
1139
- "epoch": 5.13,
1140
- "learning_rate": 9.511111111111111e-07,
1141
- "loss": 0.0328,
 
1142
  "step": 4575
1143
  },
1144
  {
1145
- "epoch": 5.14,
1146
- "learning_rate": 8.955555555555557e-07,
1147
- "loss": 0.046,
 
1148
  "step": 4600
1149
  },
1150
  {
1151
- "epoch": 5.14,
1152
- "learning_rate": 8.400000000000001e-07,
1153
- "loss": 0.0184,
 
1154
  "step": 4625
1155
  },
1156
  {
1157
- "epoch": 5.15,
1158
- "learning_rate": 7.844444444444445e-07,
1159
- "loss": 0.014,
 
1160
  "step": 4650
1161
  },
1162
  {
1163
- "epoch": 5.15,
1164
- "learning_rate": 7.28888888888889e-07,
1165
- "loss": 0.0138,
 
1166
  "step": 4675
1167
  },
1168
  {
1169
- "epoch": 6.0,
1170
- "learning_rate": 6.733333333333334e-07,
1171
- "loss": 0.0116,
 
1172
  "step": 4700
1173
  },
1174
  {
1175
- "epoch": 6.01,
1176
- "learning_rate": 6.177777777777778e-07,
1177
- "loss": 0.014,
 
1178
  "step": 4725
1179
  },
1180
  {
1181
- "epoch": 6.01,
1182
- "learning_rate": 5.622222222222223e-07,
1183
- "loss": 0.0156,
 
1184
  "step": 4750
1185
  },
1186
  {
1187
- "epoch": 6.02,
1188
- "learning_rate": 5.066666666666667e-07,
1189
- "loss": 0.0136,
 
1190
  "step": 4775
1191
  },
1192
  {
1193
- "epoch": 6.02,
1194
- "learning_rate": 4.511111111111111e-07,
1195
- "loss": 0.0144,
 
1196
  "step": 4800
1197
  },
1198
  {
1199
- "epoch": 6.03,
1200
- "learning_rate": 3.9555555555555557e-07,
1201
- "loss": 0.0155,
 
1202
  "step": 4825
1203
  },
1204
  {
1205
- "epoch": 6.03,
1206
- "learning_rate": 3.4000000000000003e-07,
1207
- "loss": 0.0187,
 
1208
  "step": 4850
1209
  },
1210
  {
1211
- "epoch": 6.04,
1212
- "learning_rate": 2.844444444444445e-07,
1213
- "loss": 0.0189,
 
1214
  "step": 4875
1215
  },
1216
  {
1217
- "epoch": 6.04,
1218
- "learning_rate": 2.2888888888888892e-07,
1219
- "loss": 0.0187,
 
1220
  "step": 4900
1221
  },
1222
  {
1223
- "epoch": 6.05,
1224
- "learning_rate": 1.7333333333333335e-07,
1225
- "loss": 0.0194,
 
1226
  "step": 4925
1227
  },
1228
  {
1229
- "epoch": 6.05,
1230
- "learning_rate": 1.1777777777777778e-07,
1231
- "loss": 0.019,
 
1232
  "step": 4950
1233
  },
1234
  {
1235
- "epoch": 6.06,
1236
- "learning_rate": 6.222222222222223e-08,
1237
- "loss": 0.0183,
 
1238
  "step": 4975
1239
  },
1240
  {
1241
- "epoch": 6.06,
1242
- "learning_rate": 6.666666666666667e-09,
1243
- "loss": 0.0199,
 
1244
  "step": 5000
1245
  },
1246
  {
1247
- "epoch": 6.06,
1248
- "eval_loss": 0.19964517652988434,
1249
- "eval_runtime": 1705.0556,
1250
- "eval_samples_per_second": 7.34,
1251
- "eval_steps_per_second": 0.459,
1252
- "eval_wer": 12.012786552211754,
1253
  "step": 5000
1254
  },
1255
  {
1256
- "epoch": 6.06,
1257
- "step": 5000,
1258
- "total_flos": 4.612864472875008e+19,
1259
- "train_loss": 0.15837200704216958,
1260
- "train_runtime": 29005.3522,
1261
- "train_samples_per_second": 5.516,
1262
- "train_steps_per_second": 0.172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
  }
1264
  ],
1265
- "max_steps": 5000,
 
 
1266
  "num_train_epochs": 9223372036854775807,
1267
- "total_flos": 4.612864472875008e+19,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1268
  "trial_name": null,
1269
  "trial_params": null
1270
  }
 
1
  {
2
+ "best_metric": 10.886229784051602,
3
+ "best_model_checkpoint": "./checkpoint-8000",
4
+ "epoch": 1.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.003125,
13
+ "grad_norm": 17.579944610595703,
14
  "learning_rate": 4.4e-07,
15
+ "loss": 2.3284,
16
  "step": 25
17
  },
18
  {
19
+ "epoch": 0.00625,
20
+ "grad_norm": 9.753120422363281,
21
  "learning_rate": 9.400000000000001e-07,
22
+ "loss": 1.9145,
23
  "step": 50
24
  },
25
  {
26
+ "epoch": 0.009375,
27
+ "grad_norm": 9.469987869262695,
28
  "learning_rate": 1.44e-06,
29
+ "loss": 1.2892,
30
  "step": 75
31
  },
32
  {
33
+ "epoch": 0.0125,
34
+ "grad_norm": 6.952774524688721,
35
  "learning_rate": 1.94e-06,
36
+ "loss": 0.9797,
37
  "step": 100
38
  },
39
  {
40
+ "epoch": 0.015625,
41
+ "grad_norm": 6.080902576446533,
42
  "learning_rate": 2.4400000000000004e-06,
43
+ "loss": 0.8265,
44
  "step": 125
45
  },
46
  {
47
+ "epoch": 0.01875,
48
+ "grad_norm": 5.6766037940979,
49
  "learning_rate": 2.9400000000000002e-06,
50
+ "loss": 0.6998,
51
  "step": 150
52
  },
53
  {
54
+ "epoch": 0.021875,
55
+ "grad_norm": 5.372249126434326,
56
  "learning_rate": 3.44e-06,
57
+ "loss": 0.6537,
58
  "step": 175
59
  },
60
  {
61
+ "epoch": 0.025,
62
+ "grad_norm": 5.710323810577393,
63
  "learning_rate": 3.94e-06,
64
+ "loss": 0.6149,
65
  "step": 200
66
  },
67
  {
68
+ "epoch": 0.028125,
69
+ "grad_norm": 5.235953330993652,
70
  "learning_rate": 4.440000000000001e-06,
71
+ "loss": 0.5256,
72
  "step": 225
73
  },
74
  {
75
+ "epoch": 0.03125,
76
+ "grad_norm": 6.58635950088501,
77
  "learning_rate": 4.94e-06,
78
+ "loss": 0.54,
79
  "step": 250
80
  },
81
  {
82
+ "epoch": 0.034375,
83
+ "grad_norm": 5.4912004470825195,
84
  "learning_rate": 5.4400000000000004e-06,
85
+ "loss": 0.5521,
86
  "step": 275
87
  },
88
  {
89
+ "epoch": 0.0375,
90
+ "grad_norm": 5.846869945526123,
91
  "learning_rate": 5.94e-06,
92
+ "loss": 0.5379,
93
  "step": 300
94
  },
95
  {
96
+ "epoch": 0.040625,
97
+ "grad_norm": 5.060309410095215,
98
  "learning_rate": 6.440000000000001e-06,
99
+ "loss": 0.4778,
100
  "step": 325
101
  },
102
  {
103
+ "epoch": 0.04375,
104
+ "grad_norm": 5.06487512588501,
105
  "learning_rate": 6.9400000000000005e-06,
106
+ "loss": 0.4152,
107
  "step": 350
108
  },
109
  {
110
+ "epoch": 0.046875,
111
+ "grad_norm": 4.936045169830322,
112
  "learning_rate": 7.440000000000001e-06,
113
+ "loss": 0.3547,
114
  "step": 375
115
  },
116
  {
117
+ "epoch": 0.05,
118
+ "grad_norm": 3.8072471618652344,
119
  "learning_rate": 7.94e-06,
120
+ "loss": 0.3428,
121
  "step": 400
122
  },
123
  {
124
+ "epoch": 0.053125,
125
+ "grad_norm": 3.9378795623779297,
126
  "learning_rate": 8.44e-06,
127
+ "loss": 0.3099,
128
  "step": 425
129
  },
130
  {
131
+ "epoch": 0.05625,
132
+ "grad_norm": 3.732869863510132,
133
  "learning_rate": 8.94e-06,
134
+ "loss": 0.2963,
135
  "step": 450
136
  },
137
  {
138
+ "epoch": 0.059375,
139
+ "grad_norm": 3.9596025943756104,
140
  "learning_rate": 9.440000000000001e-06,
141
+ "loss": 0.2745,
142
  "step": 475
143
  },
144
  {
145
+ "epoch": 0.0625,
146
+ "grad_norm": 3.428398370742798,
147
  "learning_rate": 9.940000000000001e-06,
148
+ "loss": 0.2626,
149
  "step": 500
150
  },
151
  {
152
+ "epoch": 0.065625,
153
+ "grad_norm": 5.03747034072876,
154
+ "learning_rate": 9.970666666666668e-06,
155
+ "loss": 0.2411,
156
  "step": 525
157
  },
158
  {
159
+ "epoch": 0.06875,
160
+ "grad_norm": 3.2012217044830322,
161
+ "learning_rate": 9.937333333333334e-06,
162
+ "loss": 0.2389,
163
  "step": 550
164
  },
165
  {
166
+ "epoch": 0.071875,
167
+ "grad_norm": 3.7361278533935547,
168
+ "learning_rate": 9.904e-06,
169
+ "loss": 0.2217,
170
  "step": 575
171
  },
172
  {
173
+ "epoch": 0.075,
174
+ "grad_norm": 4.509885787963867,
175
+ "learning_rate": 9.870666666666667e-06,
176
+ "loss": 0.2246,
177
  "step": 600
178
  },
179
  {
180
+ "epoch": 0.078125,
181
+ "grad_norm": 3.462961435317993,
182
+ "learning_rate": 9.837333333333335e-06,
183
+ "loss": 0.199,
184
  "step": 625
185
  },
186
  {
187
+ "epoch": 0.08125,
188
+ "grad_norm": 2.764691114425659,
189
+ "learning_rate": 9.804000000000001e-06,
190
+ "loss": 0.2156,
191
  "step": 650
192
  },
193
  {
194
+ "epoch": 0.084375,
195
+ "grad_norm": 3.059408187866211,
196
+ "learning_rate": 9.770666666666668e-06,
197
+ "loss": 0.212,
198
  "step": 675
199
  },
200
  {
201
+ "epoch": 0.0875,
202
+ "grad_norm": 3.952425718307495,
203
+ "learning_rate": 9.737333333333334e-06,
204
+ "loss": 0.2123,
205
  "step": 700
206
  },
207
  {
208
+ "epoch": 0.090625,
209
+ "grad_norm": 4.892609119415283,
210
+ "learning_rate": 9.704e-06,
211
+ "loss": 0.2343,
212
  "step": 725
213
  },
214
  {
215
+ "epoch": 0.09375,
216
+ "grad_norm": 4.592615127563477,
217
+ "learning_rate": 9.670666666666667e-06,
218
+ "loss": 0.3308,
219
  "step": 750
220
  },
221
  {
222
+ "epoch": 0.096875,
223
+ "grad_norm": 4.663967132568359,
224
+ "learning_rate": 9.637333333333333e-06,
225
+ "loss": 0.3146,
226
  "step": 775
227
  },
228
  {
229
+ "epoch": 0.1,
230
+ "grad_norm": 5.091048717498779,
231
+ "learning_rate": 9.604000000000002e-06,
232
+ "loss": 0.3519,
233
  "step": 800
234
  },
235
  {
236
+ "epoch": 0.103125,
237
+ "grad_norm": 3.8216071128845215,
238
+ "learning_rate": 9.570666666666666e-06,
239
+ "loss": 0.2365,
240
  "step": 825
241
  },
242
  {
243
+ "epoch": 0.10625,
244
+ "grad_norm": 3.122516393661499,
245
+ "learning_rate": 9.537333333333334e-06,
246
+ "loss": 0.193,
247
  "step": 850
248
  },
249
  {
250
+ "epoch": 0.109375,
251
+ "grad_norm": 2.657339096069336,
252
+ "learning_rate": 9.504e-06,
253
+ "loss": 0.1759,
254
  "step": 875
255
  },
256
  {
257
+ "epoch": 0.1125,
258
+ "grad_norm": 4.554510116577148,
259
+ "learning_rate": 9.470666666666667e-06,
260
+ "loss": 0.2387,
261
  "step": 900
262
  },
263
  {
264
+ "epoch": 0.115625,
265
+ "grad_norm": 5.045220851898193,
266
+ "learning_rate": 9.437333333333334e-06,
267
+ "loss": 0.2845,
268
  "step": 925
269
  },
270
  {
271
+ "epoch": 0.11875,
272
+ "grad_norm": 4.260054588317871,
273
+ "learning_rate": 9.404e-06,
274
+ "loss": 0.2755,
275
  "step": 950
276
  },
277
  {
278
+ "epoch": 0.121875,
279
+ "grad_norm": 5.8209147453308105,
280
+ "learning_rate": 9.370666666666668e-06,
281
+ "loss": 0.481,
282
  "step": 975
283
  },
284
  {
285
+ "epoch": 0.125,
286
+ "grad_norm": 5.498444557189941,
287
+ "learning_rate": 9.337333333333335e-06,
288
+ "loss": 0.3998,
289
  "step": 1000
290
  },
291
  {
292
+ "epoch": 0.125,
293
+ "eval_loss": 0.36512792110443115,
294
+ "eval_runtime": 153.2646,
295
+ "eval_samples_per_second": 13.728,
296
+ "eval_steps_per_second": 0.861,
297
+ "eval_wer": 21.50135552023932,
298
  "step": 1000
299
  },
300
  {
301
+ "epoch": 0.128125,
302
+ "grad_norm": 4.732964515686035,
303
+ "learning_rate": 9.304000000000001e-06,
304
+ "loss": 0.329,
305
  "step": 1025
306
  },
307
  {
308
+ "epoch": 0.13125,
309
+ "grad_norm": 3.3556125164031982,
310
+ "learning_rate": 9.270666666666667e-06,
311
+ "loss": 0.2319,
312
  "step": 1050
313
  },
314
  {
315
+ "epoch": 0.134375,
316
+ "grad_norm": 2.9708847999572754,
317
+ "learning_rate": 9.237333333333334e-06,
318
+ "loss": 0.174,
319
  "step": 1075
320
  },
321
  {
322
+ "epoch": 0.1375,
323
+ "grad_norm": 2.841306447982788,
324
+ "learning_rate": 9.204e-06,
325
+ "loss": 0.1447,
326
  "step": 1100
327
  },
328
  {
329
+ "epoch": 0.140625,
330
+ "grad_norm": 2.7909176349639893,
331
+ "learning_rate": 9.170666666666668e-06,
332
+ "loss": 0.1406,
333
  "step": 1125
334
  },
335
  {
336
+ "epoch": 0.14375,
337
+ "grad_norm": 3.37842059135437,
338
+ "learning_rate": 9.137333333333333e-06,
339
+ "loss": 0.151,
340
  "step": 1150
341
  },
342
  {
343
+ "epoch": 0.146875,
344
+ "grad_norm": 3.023977041244507,
345
+ "learning_rate": 9.104000000000001e-06,
346
+ "loss": 0.1529,
347
  "step": 1175
348
  },
349
  {
350
+ "epoch": 0.15,
351
+ "grad_norm": 3.015974283218384,
352
+ "learning_rate": 9.070666666666668e-06,
353
+ "loss": 0.1496,
354
  "step": 1200
355
  },
356
  {
357
+ "epoch": 0.153125,
358
+ "grad_norm": 4.30889892578125,
359
+ "learning_rate": 9.037333333333334e-06,
360
+ "loss": 0.219,
361
  "step": 1225
362
  },
363
  {
364
+ "epoch": 0.15625,
365
+ "grad_norm": 4.160729885101318,
366
+ "learning_rate": 9.004e-06,
367
+ "loss": 0.238,
368
  "step": 1250
369
  },
370
  {
371
+ "epoch": 0.159375,
372
+ "grad_norm": 4.687659740447998,
373
+ "learning_rate": 8.970666666666667e-06,
374
+ "loss": 0.2603,
375
  "step": 1275
376
  },
377
  {
378
+ "epoch": 0.1625,
379
+ "grad_norm": 4.577232837677002,
380
+ "learning_rate": 8.937333333333335e-06,
381
+ "loss": 0.2666,
382
  "step": 1300
383
  },
384
  {
385
+ "epoch": 0.165625,
386
+ "grad_norm": 5.091732501983643,
387
+ "learning_rate": 8.904e-06,
388
+ "loss": 0.2337,
389
  "step": 1325
390
  },
391
  {
392
+ "epoch": 0.16875,
393
+ "grad_norm": 4.125801086425781,
394
+ "learning_rate": 8.870666666666668e-06,
395
+ "loss": 0.2379,
396
  "step": 1350
397
  },
398
  {
399
+ "epoch": 0.171875,
400
+ "grad_norm": 5.142183303833008,
401
+ "learning_rate": 8.837333333333334e-06,
402
+ "loss": 0.2215,
403
  "step": 1375
404
  },
405
  {
406
+ "epoch": 0.175,
407
+ "grad_norm": 4.486277103424072,
408
+ "learning_rate": 8.804e-06,
409
+ "loss": 0.2136,
410
  "step": 1400
411
  },
412
  {
413
+ "epoch": 0.178125,
414
+ "grad_norm": 3.5466482639312744,
415
+ "learning_rate": 8.770666666666667e-06,
416
+ "loss": 0.2214,
417
  "step": 1425
418
  },
419
  {
420
+ "epoch": 0.18125,
421
+ "grad_norm": 3.6199097633361816,
422
+ "learning_rate": 8.737333333333334e-06,
423
+ "loss": 0.2113,
424
  "step": 1450
425
  },
426
  {
427
+ "epoch": 0.184375,
428
+ "grad_norm": 2.559951066970825,
429
+ "learning_rate": 8.704e-06,
430
+ "loss": 0.1552,
431
  "step": 1475
432
  },
433
  {
434
+ "epoch": 0.1875,
435
+ "grad_norm": 2.9152133464813232,
436
+ "learning_rate": 8.670666666666666e-06,
437
+ "loss": 0.1354,
438
  "step": 1500
439
  },
440
  {
441
+ "epoch": 0.190625,
442
+ "grad_norm": 2.608732223510742,
443
+ "learning_rate": 8.637333333333335e-06,
444
+ "loss": 0.144,
445
  "step": 1525
446
  },
447
  {
448
+ "epoch": 0.19375,
449
+ "grad_norm": 4.0043416023254395,
450
+ "learning_rate": 8.604000000000001e-06,
451
+ "loss": 0.1367,
452
  "step": 1550
453
  },
454
  {
455
+ "epoch": 0.196875,
456
+ "grad_norm": 2.3621206283569336,
457
+ "learning_rate": 8.570666666666667e-06,
458
+ "loss": 0.1194,
459
  "step": 1575
460
  },
461
  {
462
+ "epoch": 0.2,
463
+ "grad_norm": 2.6970181465148926,
464
+ "learning_rate": 8.537333333333334e-06,
465
+ "loss": 0.1283,
466
  "step": 1600
467
  },
468
  {
469
+ "epoch": 0.203125,
470
+ "grad_norm": 4.737370014190674,
471
+ "learning_rate": 8.504000000000002e-06,
472
+ "loss": 0.1858,
473
  "step": 1625
474
  },
475
  {
476
+ "epoch": 0.20625,
477
+ "grad_norm": 3.462738513946533,
478
+ "learning_rate": 8.470666666666667e-06,
479
+ "loss": 0.1995,
480
  "step": 1650
481
  },
482
  {
483
+ "epoch": 0.209375,
484
+ "grad_norm": 4.608364582061768,
485
+ "learning_rate": 8.437333333333335e-06,
486
+ "loss": 0.2028,
487
  "step": 1675
488
  },
489
  {
490
+ "epoch": 0.2125,
491
+ "grad_norm": 2.770601987838745,
492
+ "learning_rate": 8.404000000000001e-06,
493
+ "loss": 0.1952,
494
  "step": 1700
495
  },
496
  {
497
+ "epoch": 0.215625,
498
+ "grad_norm": 3.041656017303467,
499
+ "learning_rate": 8.370666666666668e-06,
500
+ "loss": 0.1464,
501
  "step": 1725
502
  },
503
  {
504
+ "epoch": 0.21875,
505
+ "grad_norm": 2.988032102584839,
506
+ "learning_rate": 8.337333333333334e-06,
507
+ "loss": 0.1424,
508
  "step": 1750
509
  },
510
  {
511
+ "epoch": 0.221875,
512
+ "grad_norm": 3.0646026134490967,
513
+ "learning_rate": 8.304e-06,
514
+ "loss": 0.1233,
515
  "step": 1775
516
  },
517
  {
518
+ "epoch": 0.225,
519
+ "grad_norm": 2.617403268814087,
520
+ "learning_rate": 8.270666666666667e-06,
521
+ "loss": 0.1384,
522
  "step": 1800
523
  },
524
  {
525
+ "epoch": 0.228125,
526
+ "grad_norm": 2.6170425415039062,
527
+ "learning_rate": 8.237333333333333e-06,
528
+ "loss": 0.1208,
529
  "step": 1825
530
  },
531
  {
532
+ "epoch": 0.23125,
533
+ "grad_norm": 2.1296098232269287,
534
+ "learning_rate": 8.204000000000001e-06,
535
+ "loss": 0.1176,
536
  "step": 1850
537
  },
538
  {
539
+ "epoch": 0.234375,
540
+ "grad_norm": 2.767275810241699,
541
+ "learning_rate": 8.170666666666668e-06,
542
+ "loss": 0.1189,
543
  "step": 1875
544
  },
545
  {
546
+ "epoch": 0.2375,
547
+ "grad_norm": 2.7053661346435547,
548
+ "learning_rate": 8.137333333333334e-06,
549
+ "loss": 0.1211,
550
  "step": 1900
551
  },
552
  {
553
+ "epoch": 0.240625,
554
+ "grad_norm": 2.281399965286255,
555
+ "learning_rate": 8.104e-06,
556
+ "loss": 0.1156,
557
  "step": 1925
558
  },
559
  {
560
+ "epoch": 0.24375,
561
+ "grad_norm": 3.7013635635375977,
562
+ "learning_rate": 8.070666666666667e-06,
563
+ "loss": 0.1517,
564
  "step": 1950
565
  },
566
  {
567
+ "epoch": 0.246875,
568
+ "grad_norm": 3.7125532627105713,
569
+ "learning_rate": 8.037333333333334e-06,
570
+ "loss": 0.2002,
571
  "step": 1975
572
  },
573
  {
574
+ "epoch": 0.25,
575
+ "grad_norm": 3.8716859817504883,
576
+ "learning_rate": 8.004e-06,
577
+ "loss": 0.1975,
578
  "step": 2000
579
  },
580
  {
581
+ "epoch": 0.25,
582
+ "eval_loss": 0.2918355464935303,
583
+ "eval_runtime": 153.1763,
584
+ "eval_samples_per_second": 13.736,
585
+ "eval_steps_per_second": 0.862,
586
+ "eval_wer": 15.873609423202767,
587
  "step": 2000
588
  },
589
  {
590
+ "epoch": 0.253125,
591
+ "grad_norm": 2.4911813735961914,
592
+ "learning_rate": 7.970666666666668e-06,
593
+ "loss": 0.1648,
594
  "step": 2025
595
  },
596
  {
597
+ "epoch": 0.25625,
598
+ "grad_norm": 2.604146718978882,
599
+ "learning_rate": 7.937333333333333e-06,
600
+ "loss": 0.1162,
601
  "step": 2050
602
  },
603
  {
604
+ "epoch": 0.259375,
605
+ "grad_norm": 2.7352280616760254,
606
+ "learning_rate": 7.904000000000001e-06,
607
+ "loss": 0.1135,
608
  "step": 2075
609
  },
610
  {
611
+ "epoch": 0.2625,
612
+ "grad_norm": 2.2932169437408447,
613
+ "learning_rate": 7.870666666666667e-06,
614
+ "loss": 0.1153,
615
  "step": 2100
616
  },
617
  {
618
+ "epoch": 0.265625,
619
+ "grad_norm": 3.1734797954559326,
620
+ "learning_rate": 7.837333333333334e-06,
621
+ "loss": 0.1005,
622
  "step": 2125
623
  },
624
  {
625
+ "epoch": 0.26875,
626
+ "grad_norm": 2.4353103637695312,
627
+ "learning_rate": 7.804e-06,
628
+ "loss": 0.0988,
629
  "step": 2150
630
  },
631
  {
632
+ "epoch": 0.271875,
633
+ "grad_norm": 2.8655478954315186,
634
+ "learning_rate": 7.770666666666668e-06,
635
+ "loss": 0.1028,
636
  "step": 2175
637
  },
638
  {
639
+ "epoch": 0.275,
640
+ "grad_norm": 3.800967216491699,
641
+ "learning_rate": 7.737333333333335e-06,
642
+ "loss": 0.1751,
643
  "step": 2200
644
  },
645
  {
646
+ "epoch": 0.278125,
647
+ "grad_norm": 4.212419509887695,
648
+ "learning_rate": 7.704000000000001e-06,
649
+ "loss": 0.1798,
650
  "step": 2225
651
  },
652
  {
653
+ "epoch": 0.28125,
654
+ "grad_norm": 3.5863020420074463,
655
+ "learning_rate": 7.670666666666668e-06,
656
+ "loss": 0.199,
657
  "step": 2250
658
  },
659
  {
660
+ "epoch": 0.284375,
661
+ "grad_norm": 3.1013996601104736,
662
+ "learning_rate": 7.637333333333334e-06,
663
+ "loss": 0.1335,
664
  "step": 2275
665
  },
666
  {
667
+ "epoch": 0.2875,
668
+ "grad_norm": 2.2462713718414307,
669
+ "learning_rate": 7.604e-06,
670
+ "loss": 0.0976,
671
  "step": 2300
672
  },
673
  {
674
+ "epoch": 0.290625,
675
+ "grad_norm": 2.9669203758239746,
676
+ "learning_rate": 7.570666666666668e-06,
677
+ "loss": 0.0946,
678
  "step": 2325
679
  },
680
  {
681
+ "epoch": 0.29375,
682
+ "grad_norm": 2.645289897918701,
683
+ "learning_rate": 7.537333333333334e-06,
684
+ "loss": 0.0935,
685
  "step": 2350
686
  },
687
  {
688
+ "epoch": 0.296875,
689
+ "grad_norm": 1.9715274572372437,
690
+ "learning_rate": 7.5040000000000005e-06,
691
+ "loss": 0.1045,
692
  "step": 2375
693
  },
694
  {
695
+ "epoch": 0.3,
696
+ "grad_norm": 2.1423373222351074,
697
+ "learning_rate": 7.470666666666667e-06,
698
+ "loss": 0.0977,
699
  "step": 2400
700
  },
701
  {
702
+ "epoch": 0.303125,
703
+ "grad_norm": 2.029958963394165,
704
+ "learning_rate": 7.437333333333334e-06,
705
+ "loss": 0.1061,
706
  "step": 2425
707
  },
708
  {
709
+ "epoch": 0.30625,
710
+ "grad_norm": 1.972732663154602,
711
+ "learning_rate": 7.404e-06,
712
+ "loss": 0.0998,
713
  "step": 2450
714
  },
715
  {
716
+ "epoch": 0.309375,
717
+ "grad_norm": 2.2875239849090576,
718
+ "learning_rate": 7.370666666666667e-06,
719
+ "loss": 0.1068,
720
  "step": 2475
721
  },
722
  {
723
+ "epoch": 0.3125,
724
+ "grad_norm": 3.1778981685638428,
725
+ "learning_rate": 7.337333333333334e-06,
726
+ "loss": 0.1168,
727
  "step": 2500
728
  },
729
  {
730
+ "epoch": 0.315625,
731
+ "grad_norm": 3.360576868057251,
732
+ "learning_rate": 7.304000000000001e-06,
733
+ "loss": 0.1524,
734
  "step": 2525
735
  },
736
  {
737
+ "epoch": 0.31875,
738
+ "grad_norm": 3.5467047691345215,
739
+ "learning_rate": 7.270666666666667e-06,
740
+ "loss": 0.1483,
741
  "step": 2550
742
  },
743
  {
744
+ "epoch": 0.321875,
745
+ "grad_norm": 3.488696575164795,
746
+ "learning_rate": 7.237333333333334e-06,
747
+ "loss": 0.1775,
748
  "step": 2575
749
  },
750
  {
751
+ "epoch": 0.325,
752
+ "grad_norm": 2.8800296783447266,
753
+ "learning_rate": 7.204000000000001e-06,
754
+ "loss": 0.135,
755
  "step": 2600
756
  },
757
  {
758
+ "epoch": 0.328125,
759
+ "grad_norm": 3.1020660400390625,
760
+ "learning_rate": 7.170666666666667e-06,
761
+ "loss": 0.1108,
762
  "step": 2625
763
  },
764
  {
765
+ "epoch": 0.33125,
766
+ "grad_norm": 2.1233720779418945,
767
+ "learning_rate": 7.137333333333334e-06,
768
+ "loss": 0.1002,
769
  "step": 2650
770
  },
771
  {
772
+ "epoch": 0.334375,
773
+ "grad_norm": 2.393425703048706,
774
+ "learning_rate": 7.104000000000001e-06,
775
+ "loss": 0.0941,
776
  "step": 2675
777
  },
778
  {
779
+ "epoch": 0.3375,
780
+ "grad_norm": 2.295924186706543,
781
+ "learning_rate": 7.0706666666666665e-06,
782
+ "loss": 0.0959,
783
  "step": 2700
784
  },
785
  {
786
+ "epoch": 0.340625,
787
+ "grad_norm": 1.8125039339065552,
788
+ "learning_rate": 7.037333333333334e-06,
789
+ "loss": 0.1116,
790
  "step": 2725
791
  },
792
  {
793
+ "epoch": 0.34375,
794
+ "grad_norm": 3.006834030151367,
795
+ "learning_rate": 7.004000000000001e-06,
796
+ "loss": 0.1146,
797
  "step": 2750
798
  },
799
  {
800
+ "epoch": 0.346875,
801
+ "grad_norm": 4.171006679534912,
802
+ "learning_rate": 6.970666666666667e-06,
803
+ "loss": 0.2029,
804
  "step": 2775
805
  },
806
  {
807
+ "epoch": 0.35,
808
+ "grad_norm": 3.68646240234375,
809
+ "learning_rate": 6.937333333333334e-06,
810
+ "loss": 0.1913,
811
  "step": 2800
812
  },
813
  {
814
+ "epoch": 0.353125,
815
+ "grad_norm": 3.7463300228118896,
816
+ "learning_rate": 6.904e-06,
817
+ "loss": 0.16,
818
  "step": 2825
819
  },
820
  {
821
+ "epoch": 0.35625,
822
+ "grad_norm": 3.069136381149292,
823
+ "learning_rate": 6.8706666666666676e-06,
824
+ "loss": 0.1571,
825
  "step": 2850
826
  },
827
  {
828
+ "epoch": 0.359375,
829
+ "grad_norm": 3.17172908782959,
830
+ "learning_rate": 6.837333333333334e-06,
831
+ "loss": 0.1608,
832
  "step": 2875
833
  },
834
  {
835
+ "epoch": 0.3625,
836
+ "grad_norm": 3.1673102378845215,
837
+ "learning_rate": 6.804e-06,
838
+ "loss": 0.1546,
839
  "step": 2900
840
  },
841
  {
842
+ "epoch": 0.365625,
843
+ "grad_norm": 2.344193935394287,
844
+ "learning_rate": 6.770666666666668e-06,
845
+ "loss": 0.1282,
846
  "step": 2925
847
  },
848
  {
849
+ "epoch": 0.36875,
850
+ "grad_norm": 2.5321226119995117,
851
+ "learning_rate": 6.737333333333333e-06,
852
+ "loss": 0.0979,
853
  "step": 2950
854
  },
855
  {
856
+ "epoch": 0.371875,
857
+ "grad_norm": 2.2652363777160645,
858
+ "learning_rate": 6.7040000000000005e-06,
859
+ "loss": 0.1049,
860
  "step": 2975
861
  },
862
  {
863
+ "epoch": 0.375,
864
+ "grad_norm": 2.7856993675231934,
865
+ "learning_rate": 6.670666666666668e-06,
866
+ "loss": 0.1433,
867
  "step": 3000
868
  },
869
  {
870
+ "epoch": 0.375,
871
+ "eval_loss": 0.2720916271209717,
872
+ "eval_runtime": 151.7576,
873
+ "eval_samples_per_second": 13.864,
874
+ "eval_steps_per_second": 0.87,
875
+ "eval_wer": 13.9010937646069,
876
  "step": 3000
877
  },
878
  {
879
+ "epoch": 0.378125,
880
+ "grad_norm": 4.214677810668945,
881
+ "learning_rate": 6.637333333333333e-06,
882
+ "loss": 0.1758,
883
  "step": 3025
884
  },
885
  {
886
+ "epoch": 0.38125,
887
+ "grad_norm": 4.144543647766113,
888
+ "learning_rate": 6.604000000000001e-06,
889
+ "loss": 0.1972,
890
  "step": 3050
891
  },
892
  {
893
+ "epoch": 0.384375,
894
+ "grad_norm": 2.1775295734405518,
895
+ "learning_rate": 6.570666666666667e-06,
896
+ "loss": 0.1293,
897
  "step": 3075
898
  },
899
  {
900
+ "epoch": 0.3875,
901
+ "grad_norm": 2.796152353286743,
902
+ "learning_rate": 6.537333333333334e-06,
903
+ "loss": 0.099,
904
  "step": 3100
905
  },
906
  {
907
+ "epoch": 0.390625,
908
+ "grad_norm": 2.1920204162597656,
909
+ "learning_rate": 6.504e-06,
910
+ "loss": 0.0945,
911
  "step": 3125
912
  },
913
  {
914
+ "epoch": 0.39375,
915
+ "grad_norm": 2.8689582347869873,
916
+ "learning_rate": 6.470666666666667e-06,
917
+ "loss": 0.1118,
918
  "step": 3150
919
  },
920
  {
921
+ "epoch": 0.396875,
922
+ "grad_norm": 3.580993175506592,
923
+ "learning_rate": 6.4373333333333344e-06,
924
+ "loss": 0.1732,
925
  "step": 3175
926
  },
927
  {
928
+ "epoch": 0.4,
929
+ "grad_norm": 3.9165573120117188,
930
+ "learning_rate": 6.404e-06,
931
+ "loss": 0.1581,
932
  "step": 3200
933
  },
934
  {
935
+ "epoch": 0.403125,
936
+ "grad_norm": 3.8235292434692383,
937
+ "learning_rate": 6.370666666666667e-06,
938
+ "loss": 0.1716,
939
  "step": 3225
940
  },
941
  {
942
+ "epoch": 0.40625,
943
+ "grad_norm": 3.21138072013855,
944
+ "learning_rate": 6.3373333333333345e-06,
945
+ "loss": 0.1364,
946
  "step": 3250
947
  },
948
  {
949
+ "epoch": 0.409375,
950
+ "grad_norm": 3.925539255142212,
951
+ "learning_rate": 6.304e-06,
952
+ "loss": 0.1459,
953
  "step": 3275
954
  },
955
  {
956
+ "epoch": 0.4125,
957
+ "grad_norm": 3.062764883041382,
958
+ "learning_rate": 6.270666666666667e-06,
959
+ "loss": 0.1668,
960
  "step": 3300
961
  },
962
  {
963
+ "epoch": 0.415625,
964
+ "grad_norm": 2.8379392623901367,
965
+ "learning_rate": 6.237333333333334e-06,
966
+ "loss": 0.1243,
967
  "step": 3325
968
  },
969
  {
970
+ "epoch": 0.41875,
971
+ "grad_norm": 2.979661226272583,
972
+ "learning_rate": 6.204e-06,
973
+ "loss": 0.0979,
974
  "step": 3350
975
  },
976
  {
977
+ "epoch": 0.421875,
978
+ "grad_norm": 2.4838883876800537,
979
+ "learning_rate": 6.170666666666667e-06,
980
+ "loss": 0.0848,
981
  "step": 3375
982
  },
983
  {
984
+ "epoch": 0.425,
985
+ "grad_norm": 2.3293073177337646,
986
+ "learning_rate": 6.137333333333334e-06,
987
+ "loss": 0.0927,
988
  "step": 3400
989
  },
990
  {
991
+ "epoch": 0.428125,
992
+ "grad_norm": 3.3497400283813477,
993
+ "learning_rate": 6.104000000000001e-06,
994
+ "loss": 0.0976,
995
  "step": 3425
996
  },
997
  {
998
+ "epoch": 0.43125,
999
+ "grad_norm": 2.0302255153656006,
1000
+ "learning_rate": 6.070666666666667e-06,
1001
+ "loss": 0.0881,
1002
  "step": 3450
1003
  },
1004
  {
1005
+ "epoch": 0.434375,
1006
+ "grad_norm": 2.112396001815796,
1007
+ "learning_rate": 6.037333333333334e-06,
1008
+ "loss": 0.0828,
1009
  "step": 3475
1010
  },
1011
  {
1012
+ "epoch": 0.4375,
1013
+ "grad_norm": 2.513197183609009,
1014
+ "learning_rate": 6.004000000000001e-06,
1015
+ "loss": 0.0983,
1016
  "step": 3500
1017
  },
1018
  {
1019
+ "epoch": 0.440625,
1020
+ "grad_norm": 2.1429622173309326,
1021
+ "learning_rate": 5.970666666666667e-06,
1022
+ "loss": 0.0929,
1023
  "step": 3525
1024
  },
1025
  {
1026
+ "epoch": 0.44375,
1027
+ "grad_norm": 2.7300236225128174,
1028
+ "learning_rate": 5.937333333333334e-06,
1029
+ "loss": 0.0916,
1030
  "step": 3550
1031
  },
1032
  {
1033
+ "epoch": 0.446875,
1034
+ "grad_norm": 4.011541366577148,
1035
+ "learning_rate": 5.9040000000000006e-06,
1036
+ "loss": 0.1426,
1037
  "step": 3575
1038
  },
1039
  {
1040
+ "epoch": 0.45,
1041
+ "grad_norm": 3.1994545459747314,
1042
+ "learning_rate": 5.870666666666667e-06,
1043
+ "loss": 0.163,
1044
  "step": 3600
1045
  },
1046
  {
1047
+ "epoch": 0.453125,
1048
+ "grad_norm": 2.98388934135437,
1049
+ "learning_rate": 5.837333333333333e-06,
1050
+ "loss": 0.1568,
1051
  "step": 3625
1052
  },
1053
  {
1054
+ "epoch": 0.45625,
1055
+ "grad_norm": 2.4515798091888428,
1056
+ "learning_rate": 5.804000000000001e-06,
1057
+ "loss": 0.0937,
1058
  "step": 3650
1059
  },
1060
  {
1061
+ "epoch": 0.459375,
1062
+ "grad_norm": 2.0767834186553955,
1063
+ "learning_rate": 5.770666666666666e-06,
1064
+ "loss": 0.0861,
1065
  "step": 3675
1066
  },
1067
  {
1068
+ "epoch": 0.4625,
1069
+ "grad_norm": 2.601104974746704,
1070
+ "learning_rate": 5.7373333333333335e-06,
1071
+ "loss": 0.0917,
1072
  "step": 3700
1073
  },
1074
  {
1075
+ "epoch": 0.465625,
1076
+ "grad_norm": 2.593489408493042,
1077
+ "learning_rate": 5.704000000000001e-06,
1078
+ "loss": 0.1022,
1079
  "step": 3725
1080
  },
1081
  {
1082
+ "epoch": 0.46875,
1083
+ "grad_norm": 3.5832834243774414,
1084
+ "learning_rate": 5.670666666666668e-06,
1085
+ "loss": 0.1304,
1086
  "step": 3750
1087
  },
1088
  {
1089
+ "epoch": 0.471875,
1090
+ "grad_norm": 3.4403560161590576,
1091
+ "learning_rate": 5.637333333333334e-06,
1092
+ "loss": 0.1634,
1093
  "step": 3775
1094
  },
1095
  {
1096
+ "epoch": 0.475,
1097
+ "grad_norm": 3.6842737197875977,
1098
+ "learning_rate": 5.604000000000001e-06,
1099
+ "loss": 0.1683,
1100
  "step": 3800
1101
  },
1102
  {
1103
+ "epoch": 0.478125,
1104
+ "grad_norm": 3.8382315635681152,
1105
+ "learning_rate": 5.570666666666667e-06,
1106
+ "loss": 0.1538,
1107
  "step": 3825
1108
  },
1109
  {
1110
+ "epoch": 0.48125,
1111
+ "grad_norm": 4.207257270812988,
1112
+ "learning_rate": 5.537333333333334e-06,
1113
+ "loss": 0.165,
1114
  "step": 3850
1115
  },
1116
  {
1117
+ "epoch": 0.484375,
1118
+ "grad_norm": 2.4130444526672363,
1119
+ "learning_rate": 5.504e-06,
1120
+ "loss": 0.1558,
1121
  "step": 3875
1122
  },
1123
  {
1124
+ "epoch": 0.4875,
1125
+ "grad_norm": 2.3981151580810547,
1126
+ "learning_rate": 5.4706666666666674e-06,
1127
+ "loss": 0.1096,
1128
  "step": 3900
1129
  },
1130
  {
1131
+ "epoch": 0.490625,
1132
+ "grad_norm": 2.2837915420532227,
1133
+ "learning_rate": 5.437333333333333e-06,
1134
+ "loss": 0.0937,
1135
  "step": 3925
1136
  },
1137
  {
1138
+ "epoch": 0.49375,
1139
+ "grad_norm": 2.6647775173187256,
1140
+ "learning_rate": 5.404e-06,
1141
+ "loss": 0.0876,
1142
  "step": 3950
1143
  },
1144
  {
1145
+ "epoch": 0.496875,
1146
+ "grad_norm": 3.7677643299102783,
1147
+ "learning_rate": 5.3706666666666675e-06,
1148
+ "loss": 0.15,
1149
  "step": 3975
1150
  },
1151
  {
1152
+ "epoch": 0.5,
1153
+ "grad_norm": 3.542175769805908,
1154
+ "learning_rate": 5.337333333333333e-06,
1155
+ "loss": 0.1925,
1156
  "step": 4000
1157
  },
1158
  {
1159
+ "epoch": 0.5,
1160
+ "eval_loss": 0.25648659467697144,
1161
+ "eval_runtime": 150.6646,
1162
+ "eval_samples_per_second": 13.965,
1163
+ "eval_steps_per_second": 0.876,
1164
+ "eval_wer": 12.7372160418809,
1165
  "step": 4000
1166
  },
1167
  {
1168
+ "epoch": 0.503125,
1169
+ "grad_norm": 2.5672571659088135,
1170
+ "learning_rate": 5.304e-06,
1171
+ "loss": 0.1434,
1172
  "step": 4025
1173
  },
1174
  {
1175
+ "epoch": 0.50625,
1176
+ "grad_norm": 4.591808319091797,
1177
+ "learning_rate": 5.270666666666668e-06,
1178
+ "loss": 0.2075,
1179
  "step": 4050
1180
  },
1181
  {
1182
+ "epoch": 0.509375,
1183
+ "grad_norm": 3.485185146331787,
1184
+ "learning_rate": 5.237333333333334e-06,
1185
+ "loss": 0.1478,
1186
  "step": 4075
1187
  },
1188
  {
1189
+ "epoch": 0.5125,
1190
+ "grad_norm": 2.5995991230010986,
1191
+ "learning_rate": 5.2040000000000005e-06,
1192
+ "loss": 0.1383,
1193
  "step": 4100
1194
  },
1195
  {
1196
+ "epoch": 0.515625,
1197
+ "grad_norm": 2.4682819843292236,
1198
+ "learning_rate": 5.170666666666667e-06,
1199
+ "loss": 0.0959,
1200
  "step": 4125
1201
  },
1202
  {
1203
+ "epoch": 0.51875,
1204
+ "grad_norm": 2.436518669128418,
1205
+ "learning_rate": 5.137333333333334e-06,
1206
+ "loss": 0.0857,
1207
  "step": 4150
1208
  },
1209
  {
1210
+ "epoch": 0.521875,
1211
+ "grad_norm": 2.0344107151031494,
1212
+ "learning_rate": 5.104e-06,
1213
+ "loss": 0.0862,
1214
  "step": 4175
1215
  },
1216
  {
1217
+ "epoch": 0.525,
1218
+ "grad_norm": 1.6771937608718872,
1219
+ "learning_rate": 5.070666666666667e-06,
1220
+ "loss": 0.0808,
1221
  "step": 4200
1222
  },
1223
  {
1224
+ "epoch": 0.528125,
1225
+ "grad_norm": 1.7831439971923828,
1226
+ "learning_rate": 5.037333333333334e-06,
1227
+ "loss": 0.0872,
1228
  "step": 4225
1229
  },
1230
  {
1231
+ "epoch": 0.53125,
1232
+ "grad_norm": 2.228795051574707,
1233
+ "learning_rate": 5.004e-06,
1234
+ "loss": 0.0832,
1235
  "step": 4250
1236
  },
1237
  {
1238
+ "epoch": 0.534375,
1239
+ "grad_norm": 3.1402647495269775,
1240
+ "learning_rate": 4.970666666666667e-06,
1241
+ "loss": 0.0927,
1242
  "step": 4275
1243
  },
1244
  {
1245
+ "epoch": 0.5375,
1246
+ "grad_norm": 3.662506580352783,
1247
+ "learning_rate": 4.937333333333334e-06,
1248
+ "loss": 0.1477,
1249
  "step": 4300
1250
  },
1251
  {
1252
+ "epoch": 0.540625,
1253
+ "grad_norm": 2.865934371948242,
1254
+ "learning_rate": 4.904000000000001e-06,
1255
+ "loss": 0.1262,
1256
  "step": 4325
1257
  },
1258
  {
1259
+ "epoch": 0.54375,
1260
+ "grad_norm": 3.2233200073242188,
1261
+ "learning_rate": 4.870666666666667e-06,
1262
+ "loss": 0.1329,
1263
  "step": 4350
1264
  },
1265
  {
1266
+ "epoch": 0.546875,
1267
+ "grad_norm": 2.093703269958496,
1268
+ "learning_rate": 4.837333333333334e-06,
1269
+ "loss": 0.0795,
1270
  "step": 4375
1271
  },
1272
  {
1273
+ "epoch": 0.55,
1274
+ "grad_norm": 1.7601807117462158,
1275
+ "learning_rate": 4.804e-06,
1276
+ "loss": 0.0715,
1277
  "step": 4400
1278
  },
1279
  {
1280
+ "epoch": 0.553125,
1281
+ "grad_norm": 2.1606643199920654,
1282
+ "learning_rate": 4.770666666666667e-06,
1283
+ "loss": 0.0797,
1284
  "step": 4425
1285
  },
1286
  {
1287
+ "epoch": 0.55625,
1288
+ "grad_norm": 2.565343141555786,
1289
+ "learning_rate": 4.737333333333334e-06,
1290
+ "loss": 0.0883,
1291
  "step": 4450
1292
  },
1293
  {
1294
+ "epoch": 0.559375,
1295
+ "grad_norm": 2.062619924545288,
1296
+ "learning_rate": 4.704e-06,
1297
+ "loss": 0.0965,
1298
  "step": 4475
1299
  },
1300
  {
1301
+ "epoch": 0.5625,
1302
+ "grad_norm": 2.2219879627227783,
1303
+ "learning_rate": 4.6706666666666675e-06,
1304
+ "loss": 0.0891,
1305
  "step": 4500
1306
  },
1307
  {
1308
+ "epoch": 0.565625,
1309
+ "grad_norm": 2.857029676437378,
1310
+ "learning_rate": 4.637333333333334e-06,
1311
+ "loss": 0.1147,
1312
  "step": 4525
1313
  },
1314
  {
1315
+ "epoch": 0.56875,
1316
+ "grad_norm": 3.090247392654419,
1317
+ "learning_rate": 4.604e-06,
1318
+ "loss": 0.144,
1319
  "step": 4550
1320
  },
1321
  {
1322
+ "epoch": 0.571875,
1323
+ "grad_norm": 3.8906264305114746,
1324
+ "learning_rate": 4.570666666666667e-06,
1325
+ "loss": 0.1451,
1326
  "step": 4575
1327
  },
1328
  {
1329
+ "epoch": 0.575,
1330
+ "grad_norm": 3.7733590602874756,
1331
+ "learning_rate": 4.537333333333334e-06,
1332
+ "loss": 0.1475,
1333
  "step": 4600
1334
  },
1335
  {
1336
+ "epoch": 0.578125,
1337
+ "grad_norm": 3.379163980484009,
1338
+ "learning_rate": 4.504e-06,
1339
+ "loss": 0.1509,
1340
  "step": 4625
1341
  },
1342
  {
1343
+ "epoch": 0.58125,
1344
+ "grad_norm": 3.4210824966430664,
1345
+ "learning_rate": 4.470666666666667e-06,
1346
+ "loss": 0.1444,
1347
  "step": 4650
1348
  },
1349
  {
1350
+ "epoch": 0.584375,
1351
+ "grad_norm": 3.7809910774230957,
1352
+ "learning_rate": 4.437333333333333e-06,
1353
+ "loss": 0.1295,
1354
  "step": 4675
1355
  },
1356
  {
1357
+ "epoch": 0.5875,
1358
+ "grad_norm": 2.537574052810669,
1359
+ "learning_rate": 4.4040000000000005e-06,
1360
+ "loss": 0.1158,
1361
  "step": 4700
1362
  },
1363
  {
1364
+ "epoch": 0.590625,
1365
+ "grad_norm": 3.482285261154175,
1366
+ "learning_rate": 4.370666666666667e-06,
1367
+ "loss": 0.1249,
1368
  "step": 4725
1369
  },
1370
  {
1371
+ "epoch": 0.59375,
1372
+ "grad_norm": 3.0114011764526367,
1373
+ "learning_rate": 4.337333333333334e-06,
1374
+ "loss": 0.1238,
1375
  "step": 4750
1376
  },
1377
  {
1378
+ "epoch": 0.596875,
1379
+ "grad_norm": 2.117215394973755,
1380
+ "learning_rate": 4.304000000000001e-06,
1381
+ "loss": 0.0888,
1382
  "step": 4775
1383
  },
1384
  {
1385
+ "epoch": 0.6,
1386
+ "grad_norm": 2.0158379077911377,
1387
+ "learning_rate": 4.270666666666667e-06,
1388
+ "loss": 0.0972,
1389
  "step": 4800
1390
  },
1391
  {
1392
+ "epoch": 0.603125,
1393
+ "grad_norm": 2.5208640098571777,
1394
+ "learning_rate": 4.2373333333333335e-06,
1395
+ "loss": 0.0793,
1396
  "step": 4825
1397
  },
1398
  {
1399
+ "epoch": 0.60625,
1400
+ "grad_norm": 2.820002555847168,
1401
+ "learning_rate": 4.204e-06,
1402
+ "loss": 0.1035,
1403
  "step": 4850
1404
  },
1405
  {
1406
+ "epoch": 0.609375,
1407
+ "grad_norm": 3.1144282817840576,
1408
+ "learning_rate": 4.170666666666667e-06,
1409
+ "loss": 0.1128,
1410
  "step": 4875
1411
  },
1412
  {
1413
+ "epoch": 0.6125,
1414
+ "grad_norm": 3.1345527172088623,
1415
+ "learning_rate": 4.137333333333334e-06,
1416
+ "loss": 0.1217,
1417
  "step": 4900
1418
  },
1419
  {
1420
+ "epoch": 0.615625,
1421
+ "grad_norm": 2.2702696323394775,
1422
+ "learning_rate": 4.104e-06,
1423
+ "loss": 0.1061,
1424
  "step": 4925
1425
  },
1426
  {
1427
+ "epoch": 0.61875,
1428
+ "grad_norm": 2.714102268218994,
1429
+ "learning_rate": 4.072e-06,
1430
+ "loss": 0.0919,
1431
  "step": 4950
1432
  },
1433
  {
1434
+ "epoch": 0.621875,
1435
+ "grad_norm": 2.448854923248291,
1436
+ "learning_rate": 4.0386666666666666e-06,
1437
+ "loss": 0.0855,
1438
  "step": 4975
1439
  },
1440
  {
1441
+ "epoch": 0.625,
1442
+ "grad_norm": 2.9392127990722656,
1443
+ "learning_rate": 4.005333333333334e-06,
1444
+ "loss": 0.0818,
1445
  "step": 5000
1446
  },
1447
  {
1448
+ "epoch": 0.625,
1449
+ "eval_loss": 0.2562941014766693,
1450
+ "eval_runtime": 160.0125,
1451
+ "eval_samples_per_second": 13.149,
1452
+ "eval_steps_per_second": 0.825,
1453
+ "eval_wer": 11.942600729176405,
1454
  "step": 5000
1455
  },
1456
  {
1457
+ "epoch": 0.628125,
1458
+ "grad_norm": 2.4964210987091064,
1459
+ "learning_rate": 3.972e-06,
1460
+ "loss": 0.1203,
1461
+ "step": 5025
1462
+ },
1463
+ {
1464
+ "epoch": 0.63125,
1465
+ "grad_norm": 3.330078125,
1466
+ "learning_rate": 3.938666666666667e-06,
1467
+ "loss": 0.111,
1468
+ "step": 5050
1469
+ },
1470
+ {
1471
+ "epoch": 0.634375,
1472
+ "grad_norm": 3.6872191429138184,
1473
+ "learning_rate": 3.905333333333334e-06,
1474
+ "loss": 0.164,
1475
+ "step": 5075
1476
+ },
1477
+ {
1478
+ "epoch": 0.6375,
1479
+ "grad_norm": 3.728769063949585,
1480
+ "learning_rate": 3.872e-06,
1481
+ "loss": 0.1515,
1482
+ "step": 5100
1483
+ },
1484
+ {
1485
+ "epoch": 0.640625,
1486
+ "grad_norm": 3.4183156490325928,
1487
+ "learning_rate": 3.838666666666667e-06,
1488
+ "loss": 0.1334,
1489
+ "step": 5125
1490
+ },
1491
+ {
1492
+ "epoch": 0.64375,
1493
+ "grad_norm": 3.4580440521240234,
1494
+ "learning_rate": 3.8053333333333336e-06,
1495
+ "loss": 0.134,
1496
+ "step": 5150
1497
+ },
1498
+ {
1499
+ "epoch": 0.646875,
1500
+ "grad_norm": 2.2719855308532715,
1501
+ "learning_rate": 3.772e-06,
1502
+ "loss": 0.1088,
1503
+ "step": 5175
1504
+ },
1505
+ {
1506
+ "epoch": 0.65,
1507
+ "grad_norm": 2.3186910152435303,
1508
+ "learning_rate": 3.7386666666666673e-06,
1509
+ "loss": 0.0724,
1510
+ "step": 5200
1511
+ },
1512
+ {
1513
+ "epoch": 0.653125,
1514
+ "grad_norm": 1.8175565004348755,
1515
+ "learning_rate": 3.7053333333333337e-06,
1516
+ "loss": 0.0759,
1517
+ "step": 5225
1518
+ },
1519
+ {
1520
+ "epoch": 0.65625,
1521
+ "grad_norm": 2.0874826908111572,
1522
+ "learning_rate": 3.6720000000000006e-06,
1523
+ "loss": 0.0813,
1524
+ "step": 5250
1525
+ },
1526
+ {
1527
+ "epoch": 0.659375,
1528
+ "grad_norm": 1.9950120449066162,
1529
+ "learning_rate": 3.638666666666667e-06,
1530
+ "loss": 0.0824,
1531
+ "step": 5275
1532
+ },
1533
+ {
1534
+ "epoch": 0.6625,
1535
+ "grad_norm": 2.6349194049835205,
1536
+ "learning_rate": 3.6053333333333334e-06,
1537
+ "loss": 0.0835,
1538
+ "step": 5300
1539
+ },
1540
+ {
1541
+ "epoch": 0.665625,
1542
+ "grad_norm": 2.7667415142059326,
1543
+ "learning_rate": 3.5720000000000003e-06,
1544
+ "loss": 0.0823,
1545
+ "step": 5325
1546
+ },
1547
+ {
1548
+ "epoch": 0.66875,
1549
+ "grad_norm": 3.617748260498047,
1550
+ "learning_rate": 3.538666666666667e-06,
1551
+ "loss": 0.1077,
1552
+ "step": 5350
1553
+ },
1554
+ {
1555
+ "epoch": 0.671875,
1556
+ "grad_norm": 3.2603073120117188,
1557
+ "learning_rate": 3.5053333333333335e-06,
1558
+ "loss": 0.1268,
1559
+ "step": 5375
1560
+ },
1561
+ {
1562
+ "epoch": 0.675,
1563
+ "grad_norm": 2.9681355953216553,
1564
+ "learning_rate": 3.4720000000000004e-06,
1565
+ "loss": 0.1206,
1566
+ "step": 5400
1567
+ },
1568
+ {
1569
+ "epoch": 0.678125,
1570
+ "grad_norm": 4.156548500061035,
1571
+ "learning_rate": 3.438666666666667e-06,
1572
+ "loss": 0.1279,
1573
+ "step": 5425
1574
+ },
1575
+ {
1576
+ "epoch": 0.68125,
1577
+ "grad_norm": 3.2013888359069824,
1578
+ "learning_rate": 3.4053333333333337e-06,
1579
+ "loss": 0.1177,
1580
+ "step": 5450
1581
+ },
1582
+ {
1583
+ "epoch": 0.684375,
1584
+ "grad_norm": 3.299403190612793,
1585
+ "learning_rate": 3.372e-06,
1586
+ "loss": 0.0946,
1587
+ "step": 5475
1588
+ },
1589
+ {
1590
+ "epoch": 0.6875,
1591
+ "grad_norm": 2.39630389213562,
1592
+ "learning_rate": 3.338666666666667e-06,
1593
+ "loss": 0.0944,
1594
+ "step": 5500
1595
+ },
1596
+ {
1597
+ "epoch": 0.690625,
1598
+ "grad_norm": 3.7624928951263428,
1599
+ "learning_rate": 3.3053333333333338e-06,
1600
+ "loss": 0.1149,
1601
+ "step": 5525
1602
+ },
1603
+ {
1604
+ "epoch": 0.69375,
1605
+ "grad_norm": 3.3170886039733887,
1606
+ "learning_rate": 3.272e-06,
1607
+ "loss": 0.1373,
1608
+ "step": 5550
1609
+ },
1610
+ {
1611
+ "epoch": 0.696875,
1612
+ "grad_norm": 2.2296531200408936,
1613
+ "learning_rate": 3.238666666666667e-06,
1614
+ "loss": 0.1056,
1615
+ "step": 5575
1616
+ },
1617
+ {
1618
+ "epoch": 0.7,
1619
+ "grad_norm": 1.8995999097824097,
1620
+ "learning_rate": 3.2053333333333334e-06,
1621
+ "loss": 0.0724,
1622
+ "step": 5600
1623
+ },
1624
+ {
1625
+ "epoch": 0.703125,
1626
+ "grad_norm": 2.3782520294189453,
1627
+ "learning_rate": 3.172e-06,
1628
+ "loss": 0.0604,
1629
+ "step": 5625
1630
+ },
1631
+ {
1632
+ "epoch": 0.70625,
1633
+ "grad_norm": 2.2558810710906982,
1634
+ "learning_rate": 3.138666666666667e-06,
1635
+ "loss": 0.0581,
1636
+ "step": 5650
1637
+ },
1638
+ {
1639
+ "epoch": 0.709375,
1640
+ "grad_norm": 2.4040448665618896,
1641
+ "learning_rate": 3.1053333333333336e-06,
1642
+ "loss": 0.0713,
1643
+ "step": 5675
1644
+ },
1645
+ {
1646
+ "epoch": 0.7125,
1647
+ "grad_norm": 2.5696732997894287,
1648
+ "learning_rate": 3.072e-06,
1649
+ "loss": 0.0773,
1650
+ "step": 5700
1651
+ },
1652
+ {
1653
+ "epoch": 0.715625,
1654
+ "grad_norm": 2.237166404724121,
1655
+ "learning_rate": 3.038666666666667e-06,
1656
+ "loss": 0.0765,
1657
+ "step": 5725
1658
+ },
1659
+ {
1660
+ "epoch": 0.71875,
1661
+ "grad_norm": 1.8783671855926514,
1662
+ "learning_rate": 3.0053333333333332e-06,
1663
+ "loss": 0.0779,
1664
+ "step": 5750
1665
+ },
1666
+ {
1667
+ "epoch": 0.721875,
1668
+ "grad_norm": 2.096334457397461,
1669
+ "learning_rate": 2.9720000000000005e-06,
1670
+ "loss": 0.0751,
1671
+ "step": 5775
1672
+ },
1673
+ {
1674
+ "epoch": 0.725,
1675
+ "grad_norm": 2.0362164974212646,
1676
+ "learning_rate": 2.938666666666667e-06,
1677
+ "loss": 0.0711,
1678
+ "step": 5800
1679
+ },
1680
+ {
1681
+ "epoch": 0.728125,
1682
+ "grad_norm": 1.7136311531066895,
1683
+ "learning_rate": 2.9053333333333334e-06,
1684
+ "loss": 0.0635,
1685
+ "step": 5825
1686
+ },
1687
+ {
1688
+ "epoch": 0.73125,
1689
+ "grad_norm": 2.754848003387451,
1690
+ "learning_rate": 2.872e-06,
1691
+ "loss": 0.0698,
1692
+ "step": 5850
1693
+ },
1694
+ {
1695
+ "epoch": 0.734375,
1696
+ "grad_norm": 2.058065176010132,
1697
+ "learning_rate": 2.8386666666666666e-06,
1698
+ "loss": 0.0741,
1699
+ "step": 5875
1700
+ },
1701
+ {
1702
+ "epoch": 0.7375,
1703
+ "grad_norm": 3.0389583110809326,
1704
+ "learning_rate": 2.805333333333334e-06,
1705
+ "loss": 0.0938,
1706
+ "step": 5900
1707
+ },
1708
+ {
1709
+ "epoch": 0.740625,
1710
+ "grad_norm": 3.4811720848083496,
1711
+ "learning_rate": 2.7720000000000003e-06,
1712
+ "loss": 0.1387,
1713
+ "step": 5925
1714
+ },
1715
+ {
1716
+ "epoch": 0.74375,
1717
+ "grad_norm": 3.2388477325439453,
1718
+ "learning_rate": 2.7386666666666667e-06,
1719
+ "loss": 0.1283,
1720
+ "step": 5950
1721
+ },
1722
+ {
1723
+ "epoch": 0.746875,
1724
+ "grad_norm": 3.083925247192383,
1725
+ "learning_rate": 2.7053333333333336e-06,
1726
+ "loss": 0.1073,
1727
+ "step": 5975
1728
+ },
1729
+ {
1730
+ "epoch": 0.75,
1731
+ "grad_norm": 2.6847918033599854,
1732
+ "learning_rate": 2.672e-06,
1733
+ "loss": 0.1038,
1734
+ "step": 6000
1735
+ },
1736
+ {
1737
+ "epoch": 0.75,
1738
+ "eval_loss": 0.23902159929275513,
1739
+ "eval_runtime": 158.0693,
1740
+ "eval_samples_per_second": 13.311,
1741
+ "eval_steps_per_second": 0.835,
1742
+ "eval_wer": 11.07319809292325,
1743
+ "step": 6000
1744
+ },
1745
+ {
1746
+ "epoch": 0.753125,
1747
+ "grad_norm": 2.7315189838409424,
1748
+ "learning_rate": 2.6386666666666673e-06,
1749
+ "loss": 0.0987,
1750
+ "step": 6025
1751
+ },
1752
+ {
1753
+ "epoch": 0.75625,
1754
+ "grad_norm": 2.3389735221862793,
1755
+ "learning_rate": 2.6053333333333337e-06,
1756
+ "loss": 0.0858,
1757
+ "step": 6050
1758
+ },
1759
+ {
1760
+ "epoch": 0.759375,
1761
+ "grad_norm": 1.982534646987915,
1762
+ "learning_rate": 2.572e-06,
1763
+ "loss": 0.0764,
1764
+ "step": 6075
1765
+ },
1766
+ {
1767
+ "epoch": 0.7625,
1768
+ "grad_norm": 1.9040074348449707,
1769
+ "learning_rate": 2.538666666666667e-06,
1770
+ "loss": 0.0731,
1771
+ "step": 6100
1772
+ },
1773
+ {
1774
+ "epoch": 0.765625,
1775
+ "grad_norm": 2.654710054397583,
1776
+ "learning_rate": 2.5053333333333334e-06,
1777
+ "loss": 0.0758,
1778
+ "step": 6125
1779
+ },
1780
+ {
1781
+ "epoch": 0.76875,
1782
+ "grad_norm": 2.6400296688079834,
1783
+ "learning_rate": 2.4720000000000002e-06,
1784
+ "loss": 0.0824,
1785
+ "step": 6150
1786
+ },
1787
+ {
1788
+ "epoch": 0.771875,
1789
+ "grad_norm": 7.269197463989258,
1790
+ "learning_rate": 2.438666666666667e-06,
1791
+ "loss": 0.0822,
1792
+ "step": 6175
1793
+ },
1794
+ {
1795
+ "epoch": 0.775,
1796
+ "grad_norm": 2.363656520843506,
1797
+ "learning_rate": 2.4053333333333335e-06,
1798
+ "loss": 0.0818,
1799
+ "step": 6200
1800
+ },
1801
+ {
1802
+ "epoch": 0.778125,
1803
+ "grad_norm": 2.4660115242004395,
1804
+ "learning_rate": 2.3720000000000003e-06,
1805
+ "loss": 0.0768,
1806
+ "step": 6225
1807
+ },
1808
+ {
1809
+ "epoch": 0.78125,
1810
+ "grad_norm": 3.3116371631622314,
1811
+ "learning_rate": 2.3386666666666668e-06,
1812
+ "loss": 0.0783,
1813
+ "step": 6250
1814
+ },
1815
+ {
1816
+ "epoch": 0.784375,
1817
+ "grad_norm": 2.595853090286255,
1818
+ "learning_rate": 2.3053333333333336e-06,
1819
+ "loss": 0.0899,
1820
+ "step": 6275
1821
+ },
1822
+ {
1823
+ "epoch": 0.7875,
1824
+ "grad_norm": 2.709597587585449,
1825
+ "learning_rate": 2.2720000000000004e-06,
1826
+ "loss": 0.0953,
1827
+ "step": 6300
1828
+ },
1829
+ {
1830
+ "epoch": 0.790625,
1831
+ "grad_norm": 2.4446637630462646,
1832
+ "learning_rate": 2.238666666666667e-06,
1833
+ "loss": 0.1249,
1834
+ "step": 6325
1835
+ },
1836
+ {
1837
+ "epoch": 0.79375,
1838
+ "grad_norm": 3.4412341117858887,
1839
+ "learning_rate": 2.2053333333333333e-06,
1840
+ "loss": 0.1171,
1841
+ "step": 6350
1842
+ },
1843
+ {
1844
+ "epoch": 0.796875,
1845
+ "grad_norm": 2.2719008922576904,
1846
+ "learning_rate": 2.172e-06,
1847
+ "loss": 0.1065,
1848
+ "step": 6375
1849
+ },
1850
+ {
1851
+ "epoch": 0.8,
1852
+ "grad_norm": 1.9873290061950684,
1853
+ "learning_rate": 2.138666666666667e-06,
1854
+ "loss": 0.0872,
1855
+ "step": 6400
1856
+ },
1857
+ {
1858
+ "epoch": 0.803125,
1859
+ "grad_norm": 2.487403392791748,
1860
+ "learning_rate": 2.1053333333333334e-06,
1861
+ "loss": 0.0765,
1862
+ "step": 6425
1863
+ },
1864
+ {
1865
+ "epoch": 0.80625,
1866
+ "grad_norm": 2.4424736499786377,
1867
+ "learning_rate": 2.0720000000000002e-06,
1868
+ "loss": 0.0736,
1869
+ "step": 6450
1870
+ },
1871
+ {
1872
+ "epoch": 0.809375,
1873
+ "grad_norm": 3.1507577896118164,
1874
+ "learning_rate": 2.0386666666666667e-06,
1875
+ "loss": 0.1064,
1876
+ "step": 6475
1877
+ },
1878
+ {
1879
+ "epoch": 0.8125,
1880
+ "grad_norm": 2.6285648345947266,
1881
+ "learning_rate": 2.0053333333333335e-06,
1882
+ "loss": 0.0993,
1883
+ "step": 6500
1884
+ },
1885
+ {
1886
+ "epoch": 0.815625,
1887
+ "grad_norm": 4.1934967041015625,
1888
+ "learning_rate": 1.972e-06,
1889
+ "loss": 0.1299,
1890
+ "step": 6525
1891
+ },
1892
+ {
1893
+ "epoch": 0.81875,
1894
+ "grad_norm": 3.031852960586548,
1895
+ "learning_rate": 1.9386666666666668e-06,
1896
+ "loss": 0.1195,
1897
+ "step": 6550
1898
+ },
1899
+ {
1900
+ "epoch": 0.821875,
1901
+ "grad_norm": 2.9288837909698486,
1902
+ "learning_rate": 1.9053333333333334e-06,
1903
+ "loss": 0.1197,
1904
+ "step": 6575
1905
+ },
1906
+ {
1907
+ "epoch": 0.825,
1908
+ "grad_norm": 2.890054225921631,
1909
+ "learning_rate": 1.8720000000000002e-06,
1910
+ "loss": 0.1127,
1911
+ "step": 6600
1912
+ },
1913
+ {
1914
+ "epoch": 0.828125,
1915
+ "grad_norm": 3.130406618118286,
1916
+ "learning_rate": 1.8386666666666669e-06,
1917
+ "loss": 0.1155,
1918
+ "step": 6625
1919
+ },
1920
+ {
1921
+ "epoch": 0.83125,
1922
+ "grad_norm": 2.7169485092163086,
1923
+ "learning_rate": 1.8053333333333333e-06,
1924
+ "loss": 0.1291,
1925
+ "step": 6650
1926
+ },
1927
+ {
1928
+ "epoch": 0.834375,
1929
+ "grad_norm": 2.7390034198760986,
1930
+ "learning_rate": 1.7720000000000001e-06,
1931
+ "loss": 0.1097,
1932
+ "step": 6675
1933
+ },
1934
+ {
1935
+ "epoch": 0.8375,
1936
+ "grad_norm": 2.161604166030884,
1937
+ "learning_rate": 1.7386666666666668e-06,
1938
+ "loss": 0.1022,
1939
+ "step": 6700
1940
+ },
1941
+ {
1942
+ "epoch": 0.840625,
1943
+ "grad_norm": 2.210451126098633,
1944
+ "learning_rate": 1.7053333333333336e-06,
1945
+ "loss": 0.0779,
1946
+ "step": 6725
1947
+ },
1948
+ {
1949
+ "epoch": 0.84375,
1950
+ "grad_norm": 2.426438808441162,
1951
+ "learning_rate": 1.672e-06,
1952
+ "loss": 0.0728,
1953
+ "step": 6750
1954
+ },
1955
+ {
1956
+ "epoch": 0.846875,
1957
+ "grad_norm": 2.8744237422943115,
1958
+ "learning_rate": 1.6386666666666667e-06,
1959
+ "loss": 0.0859,
1960
+ "step": 6775
1961
+ },
1962
+ {
1963
+ "epoch": 0.85,
1964
+ "grad_norm": 2.8165483474731445,
1965
+ "learning_rate": 1.6053333333333335e-06,
1966
+ "loss": 0.1496,
1967
+ "step": 6800
1968
+ },
1969
+ {
1970
+ "epoch": 0.853125,
1971
+ "grad_norm": 4.0077738761901855,
1972
+ "learning_rate": 1.5720000000000002e-06,
1973
+ "loss": 0.1343,
1974
+ "step": 6825
1975
+ },
1976
+ {
1977
+ "epoch": 0.85625,
1978
+ "grad_norm": 3.8011586666107178,
1979
+ "learning_rate": 1.538666666666667e-06,
1980
+ "loss": 0.1397,
1981
+ "step": 6850
1982
+ },
1983
+ {
1984
+ "epoch": 0.859375,
1985
+ "grad_norm": 2.7379047870635986,
1986
+ "learning_rate": 1.5053333333333334e-06,
1987
+ "loss": 0.1262,
1988
+ "step": 6875
1989
+ },
1990
+ {
1991
+ "epoch": 0.8625,
1992
+ "grad_norm": 3.250950574874878,
1993
+ "learning_rate": 1.472e-06,
1994
+ "loss": 0.1188,
1995
+ "step": 6900
1996
+ },
1997
+ {
1998
+ "epoch": 0.865625,
1999
+ "grad_norm": 2.782945156097412,
2000
+ "learning_rate": 1.438666666666667e-06,
2001
+ "loss": 0.1103,
2002
+ "step": 6925
2003
+ },
2004
+ {
2005
+ "epoch": 0.86875,
2006
+ "grad_norm": 3.08154034614563,
2007
+ "learning_rate": 1.4053333333333335e-06,
2008
+ "loss": 0.1147,
2009
+ "step": 6950
2010
+ },
2011
+ {
2012
+ "epoch": 0.871875,
2013
+ "grad_norm": 3.5768070220947266,
2014
+ "learning_rate": 1.372e-06,
2015
+ "loss": 0.1332,
2016
+ "step": 6975
2017
+ },
2018
+ {
2019
+ "epoch": 0.875,
2020
+ "grad_norm": 3.155341863632202,
2021
+ "learning_rate": 1.3386666666666668e-06,
2022
+ "loss": 0.1282,
2023
+ "step": 7000
2024
+ },
2025
+ {
2026
+ "epoch": 0.875,
2027
+ "eval_loss": 0.23438745737075806,
2028
+ "eval_runtime": 154.8314,
2029
+ "eval_samples_per_second": 13.589,
2030
+ "eval_steps_per_second": 0.853,
2031
+ "eval_wer": 11.391044218005048,
2032
+ "step": 7000
2033
+ },
2034
+ {
2035
+ "epoch": 0.878125,
2036
+ "grad_norm": 11.062019348144531,
2037
+ "learning_rate": 1.308e-06,
2038
+ "loss": 0.2406,
2039
+ "step": 7025
2040
+ },
2041
+ {
2042
+ "epoch": 0.88125,
2043
+ "grad_norm": 4.648179531097412,
2044
+ "learning_rate": 1.2746666666666669e-06,
2045
+ "loss": 0.3469,
2046
+ "step": 7050
2047
+ },
2048
+ {
2049
+ "epoch": 0.884375,
2050
+ "grad_norm": 4.388245105743408,
2051
+ "learning_rate": 1.2413333333333335e-06,
2052
+ "loss": 0.3421,
2053
+ "step": 7075
2054
+ },
2055
+ {
2056
+ "epoch": 0.8875,
2057
+ "grad_norm": 4.806427478790283,
2058
+ "learning_rate": 1.2080000000000001e-06,
2059
+ "loss": 0.2847,
2060
+ "step": 7100
2061
+ },
2062
+ {
2063
+ "epoch": 0.890625,
2064
+ "grad_norm": 3.0818049907684326,
2065
+ "learning_rate": 1.1746666666666668e-06,
2066
+ "loss": 0.1671,
2067
+ "step": 7125
2068
+ },
2069
+ {
2070
+ "epoch": 0.89375,
2071
+ "grad_norm": 4.117819309234619,
2072
+ "learning_rate": 1.1413333333333334e-06,
2073
+ "loss": 0.1313,
2074
+ "step": 7150
2075
+ },
2076
+ {
2077
+ "epoch": 0.896875,
2078
+ "grad_norm": 2.8558835983276367,
2079
+ "learning_rate": 1.108e-06,
2080
+ "loss": 0.1177,
2081
+ "step": 7175
2082
+ },
2083
+ {
2084
+ "epoch": 0.9,
2085
+ "grad_norm": 3.0425021648406982,
2086
+ "learning_rate": 1.0746666666666669e-06,
2087
+ "loss": 0.0911,
2088
+ "step": 7200
2089
+ },
2090
+ {
2091
+ "epoch": 0.903125,
2092
+ "grad_norm": 2.6587588787078857,
2093
+ "learning_rate": 1.0413333333333333e-06,
2094
+ "loss": 0.0898,
2095
+ "step": 7225
2096
+ },
2097
+ {
2098
+ "epoch": 0.90625,
2099
+ "grad_norm": 1.7572664022445679,
2100
+ "learning_rate": 1.0080000000000001e-06,
2101
+ "loss": 0.0922,
2102
+ "step": 7250
2103
+ },
2104
+ {
2105
+ "epoch": 0.909375,
2106
+ "grad_norm": 2.00393009185791,
2107
+ "learning_rate": 9.746666666666668e-07,
2108
+ "loss": 0.0753,
2109
+ "step": 7275
2110
+ },
2111
+ {
2112
+ "epoch": 0.9125,
2113
+ "grad_norm": 1.845981478691101,
2114
+ "learning_rate": 9.413333333333334e-07,
2115
+ "loss": 0.0628,
2116
+ "step": 7300
2117
+ },
2118
+ {
2119
+ "epoch": 0.915625,
2120
+ "grad_norm": 2.008112907409668,
2121
+ "learning_rate": 9.080000000000001e-07,
2122
+ "loss": 0.0696,
2123
+ "step": 7325
2124
+ },
2125
+ {
2126
+ "epoch": 0.91875,
2127
+ "grad_norm": 2.837357759475708,
2128
+ "learning_rate": 8.746666666666668e-07,
2129
+ "loss": 0.0897,
2130
+ "step": 7350
2131
+ },
2132
+ {
2133
+ "epoch": 0.921875,
2134
+ "grad_norm": 2.4842417240142822,
2135
+ "learning_rate": 8.413333333333334e-07,
2136
+ "loss": 0.1227,
2137
+ "step": 7375
2138
+ },
2139
+ {
2140
+ "epoch": 0.925,
2141
+ "grad_norm": 2.7866716384887695,
2142
+ "learning_rate": 8.08e-07,
2143
+ "loss": 0.1012,
2144
+ "step": 7400
2145
+ },
2146
+ {
2147
+ "epoch": 0.928125,
2148
+ "grad_norm": 2.1826930046081543,
2149
+ "learning_rate": 7.746666666666668e-07,
2150
+ "loss": 0.1141,
2151
+ "step": 7425
2152
+ },
2153
+ {
2154
+ "epoch": 0.93125,
2155
+ "grad_norm": 2.014090061187744,
2156
+ "learning_rate": 7.413333333333333e-07,
2157
+ "loss": 0.0754,
2158
+ "step": 7450
2159
+ },
2160
+ {
2161
+ "epoch": 0.934375,
2162
+ "grad_norm": 2.1539175510406494,
2163
+ "learning_rate": 7.08e-07,
2164
+ "loss": 0.0736,
2165
+ "step": 7475
2166
+ },
2167
+ {
2168
+ "epoch": 0.9375,
2169
+ "grad_norm": 2.712541341781616,
2170
+ "learning_rate": 6.746666666666667e-07,
2171
+ "loss": 0.0684,
2172
+ "step": 7500
2173
+ },
2174
+ {
2175
+ "epoch": 0.940625,
2176
+ "grad_norm": 3.281242847442627,
2177
+ "learning_rate": 6.413333333333334e-07,
2178
+ "loss": 0.1414,
2179
+ "step": 7525
2180
+ },
2181
+ {
2182
+ "epoch": 0.94375,
2183
+ "grad_norm": 4.088025093078613,
2184
+ "learning_rate": 6.08e-07,
2185
+ "loss": 0.1895,
2186
+ "step": 7550
2187
+ },
2188
+ {
2189
+ "epoch": 0.946875,
2190
+ "grad_norm": 4.144560813903809,
2191
+ "learning_rate": 5.746666666666667e-07,
2192
+ "loss": 0.222,
2193
+ "step": 7575
2194
+ },
2195
+ {
2196
+ "epoch": 0.95,
2197
+ "grad_norm": 1.8468823432922363,
2198
+ "learning_rate": 5.413333333333334e-07,
2199
+ "loss": 0.1349,
2200
+ "step": 7600
2201
+ },
2202
+ {
2203
+ "epoch": 0.953125,
2204
+ "grad_norm": 2.5354621410369873,
2205
+ "learning_rate": 5.08e-07,
2206
+ "loss": 0.0872,
2207
+ "step": 7625
2208
+ },
2209
+ {
2210
+ "epoch": 0.95625,
2211
+ "grad_norm": 1.83882737159729,
2212
+ "learning_rate": 4.746666666666667e-07,
2213
+ "loss": 0.0725,
2214
+ "step": 7650
2215
+ },
2216
+ {
2217
+ "epoch": 0.959375,
2218
+ "grad_norm": 3.42556095123291,
2219
+ "learning_rate": 4.413333333333333e-07,
2220
+ "loss": 0.0988,
2221
+ "step": 7675
2222
+ },
2223
+ {
2224
+ "epoch": 0.9625,
2225
+ "grad_norm": 2.682558059692383,
2226
+ "learning_rate": 4.0800000000000005e-07,
2227
+ "loss": 0.1166,
2228
+ "step": 7700
2229
+ },
2230
+ {
2231
+ "epoch": 0.965625,
2232
+ "grad_norm": 3.2471797466278076,
2233
+ "learning_rate": 3.7466666666666674e-07,
2234
+ "loss": 0.1257,
2235
+ "step": 7725
2236
+ },
2237
+ {
2238
+ "epoch": 0.96875,
2239
+ "grad_norm": 2.4202020168304443,
2240
+ "learning_rate": 3.4133333333333337e-07,
2241
+ "loss": 0.1114,
2242
+ "step": 7750
2243
+ },
2244
+ {
2245
+ "epoch": 0.971875,
2246
+ "grad_norm": 2.8282711505889893,
2247
+ "learning_rate": 3.0800000000000006e-07,
2248
+ "loss": 0.0811,
2249
+ "step": 7775
2250
+ },
2251
+ {
2252
+ "epoch": 0.975,
2253
+ "grad_norm": 4.20676326751709,
2254
+ "learning_rate": 2.746666666666667e-07,
2255
+ "loss": 0.104,
2256
+ "step": 7800
2257
+ },
2258
+ {
2259
+ "epoch": 0.978125,
2260
+ "grad_norm": 4.955998420715332,
2261
+ "learning_rate": 2.413333333333333e-07,
2262
+ "loss": 0.2773,
2263
+ "step": 7825
2264
+ },
2265
+ {
2266
+ "epoch": 0.98125,
2267
+ "grad_norm": 2.0168468952178955,
2268
+ "learning_rate": 2.08e-07,
2269
+ "loss": 0.1105,
2270
+ "step": 7850
2271
+ },
2272
+ {
2273
+ "epoch": 0.984375,
2274
+ "grad_norm": 1.6335862874984741,
2275
+ "learning_rate": 1.7466666666666667e-07,
2276
+ "loss": 0.0808,
2277
+ "step": 7875
2278
+ },
2279
+ {
2280
+ "epoch": 0.9875,
2281
+ "grad_norm": 2.269954204559326,
2282
+ "learning_rate": 1.4133333333333333e-07,
2283
+ "loss": 0.0786,
2284
+ "step": 7900
2285
+ },
2286
+ {
2287
+ "epoch": 0.990625,
2288
+ "grad_norm": 2.0813560485839844,
2289
+ "learning_rate": 1.0800000000000001e-07,
2290
+ "loss": 0.0801,
2291
+ "step": 7925
2292
+ },
2293
+ {
2294
+ "epoch": 0.99375,
2295
+ "grad_norm": 1.6093230247497559,
2296
+ "learning_rate": 7.466666666666667e-08,
2297
+ "loss": 0.0687,
2298
+ "step": 7950
2299
+ },
2300
+ {
2301
+ "epoch": 0.996875,
2302
+ "grad_norm": 1.730695366859436,
2303
+ "learning_rate": 4.133333333333334e-08,
2304
+ "loss": 0.0814,
2305
+ "step": 7975
2306
+ },
2307
+ {
2308
+ "epoch": 1.0,
2309
+ "grad_norm": 3.418311595916748,
2310
+ "learning_rate": 8e-09,
2311
+ "loss": 0.0959,
2312
+ "step": 8000
2313
+ },
2314
+ {
2315
+ "epoch": 1.0,
2316
+ "eval_loss": 0.1835634410381317,
2317
+ "eval_runtime": 154.4338,
2318
+ "eval_samples_per_second": 13.624,
2319
+ "eval_steps_per_second": 0.855,
2320
+ "eval_wer": 10.886229784051602,
2321
+ "step": 8000
2322
+ },
2323
+ {
2324
+ "epoch": 1.0,
2325
+ "step": 8000,
2326
+ "total_flos": 7.387786248192e+19,
2327
+ "train_loss": 0.17036041705310345,
2328
+ "train_runtime": 11036.9074,
2329
+ "train_samples_per_second": 23.195,
2330
+ "train_steps_per_second": 0.725
2331
  }
2332
  ],
2333
+ "logging_steps": 25,
2334
+ "max_steps": 8000,
2335
+ "num_input_tokens_seen": 0,
2336
  "num_train_epochs": 9223372036854775807,
2337
+ "save_steps": 1000,
2338
+ "stateful_callbacks": {
2339
+ "TrainerControl": {
2340
+ "args": {
2341
+ "should_epoch_stop": false,
2342
+ "should_evaluate": false,
2343
+ "should_log": false,
2344
+ "should_save": true,
2345
+ "should_training_stop": true
2346
+ },
2347
+ "attributes": {}
2348
+ }
2349
+ },
2350
+ "total_flos": 7.387786248192e+19,
2351
+ "train_batch_size": 32,
2352
  "trial_name": null,
2353
  "trial_params": null
2354
  }
wandb/run-20250212_152709-lejyafmi/files/output.log CHANGED
@@ -1612,3 +1612,171 @@ It seems you are trying to upload a large folder at once. This might take some t
1612
  /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1613
  warnings.warn(
1614
  run-lejyafmi.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1612
  /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1613
  warnings.warn(
1614
  run-lejyafmi.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
1615
+ ***** train metrics *****
1616
+ epoch = 1.0
1617
+ total_flos = 68804121093GF
1618
+ train_loss = 0.1704
1619
+ train_runtime = 3:03:56.90
1620
+ train_samples_per_second = 23.195
1621
+ train_steps_per_second = 0.725
1622
+ 02/12/2025 18:34:14 - INFO - __main__ - *** Evaluate ***
1623
+ [INFO|trainer.py:4176] 2025-02-12 18:34:14,390 >>
1624
+ ***** Running Evaluation *****
1625
+ [INFO|trainer.py:4180] 2025-02-12 18:34:14,390 >> Num examples: Unknown
1626
+ [INFO|trainer.py:4181] 2025-02-12 18:34:14,390 >> Batch size = 16
1627
+ [INFO|trainer_utils.py:837] 2025-02-12 18:34:21,770 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
1628
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:21,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1629
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:23,093 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1630
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:24,451 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1631
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:25,719 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1632
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:26,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1633
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:28,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1634
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:29,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1635
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:30,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1636
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:31,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1637
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:32,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1638
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:33,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1639
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:34,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1640
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:35,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1641
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:36,700 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1642
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:37,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1643
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:38,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1644
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:39,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1645
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:40,537 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1646
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:41,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1647
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:42,380 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1648
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:43,354 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1649
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:44,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1650
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:45,314 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1651
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:46,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1652
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:47,281 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1653
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:48,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1654
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:49,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1655
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:50,458 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1656
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:51,370 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1657
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:52,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1658
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:53,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1659
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:54,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1660
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:55,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1661
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:56,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1662
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:57,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1663
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:58,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1664
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:34:59,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1665
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:00,681 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1666
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:01,717 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1667
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:03,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1668
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:04,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1669
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:05,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1670
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:06,930 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1671
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:07,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1672
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:08,880 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1673
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:09,968 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1674
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:10,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1675
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:11,921 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1676
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1677
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:14,074 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1678
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:15,109 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1679
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:16,148 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1680
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:17,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1681
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:18,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1682
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:19,129 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1683
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:20,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1684
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:21,190 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1685
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:22,160 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1686
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:23,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1687
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:24,153 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1688
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:25,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1689
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:26,238 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1690
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:27,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1691
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:28,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1692
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:29,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1693
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:30,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1694
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:31,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1695
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:32,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1696
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:33,287 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1697
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:34,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1698
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:35,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1699
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:36,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1700
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:37,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1701
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:38,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1702
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:39,530 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1703
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:40,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1704
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:41,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1705
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:42,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1706
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:43,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1707
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:44,509 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1708
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:45,496 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1709
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:46,461 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1710
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:47,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1711
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:48,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1712
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:49,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1713
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:50,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1714
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:51,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1715
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:52,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1716
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:53,784 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1717
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:54,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1718
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:55,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1719
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:56,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1720
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:57,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1721
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:35:58,926 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1722
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:00,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1723
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:01,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1724
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:02,083 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1725
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:03,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1726
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:04,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1727
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:05,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1728
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:06,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1729
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:07,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1730
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:08,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1731
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:09,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1732
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:10,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1733
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:11,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1734
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:12,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1735
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:13,562 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1736
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:14,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1737
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:15,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1738
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:16,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1739
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:17,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1740
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:18,568 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1741
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:19,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1742
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:20,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1743
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:21,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1744
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:22,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1745
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:23,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1746
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:24,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1747
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:25,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1748
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:26,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1749
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:27,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1750
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:28,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1751
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:29,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1752
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:30,585 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1753
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:31,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1754
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:32,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1755
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:33,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1756
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:34,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1757
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:35,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1758
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:36,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1759
+ [INFO|generation_whisper.py:1844] 2025-02-12 18:36:37,437 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
1760
+ ***** eval metrics *****
1761
+ epoch = 1.0
1762
+ eval_loss = 0.1836
1763
+ eval_runtime = 0:02:31.38
1764
+ eval_samples_per_second = 13.899
1765
+ eval_steps_per_second = 0.872
1766
+ eval_wer = 10.8862
1767
+ [INFO|trainer.py:3860] 2025-02-12 18:36:45,773 >> Saving model checkpoint to ./
1768
+ [INFO|configuration_utils.py:423] 2025-02-12 18:36:45,774 >> Configuration saved in ./config.json
1769
+ [INFO|configuration_utils.py:906] 2025-02-12 18:36:45,775 >> Configuration saved in ./generation_config.json
1770
+ [INFO|modeling_utils.py:3040] 2025-02-12 18:36:47,949 >> Model weights saved in ./model.safetensors
1771
+ [INFO|feature_extraction_utils.py:437] 2025-02-12 18:36:47,950 >> Feature extractor saved in ./preprocessor_config.json
1772
+ It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
1773
+ 02/12/2025 18:36:51 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
1774
+ /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1775
+ warnings.warn(
1776
+ /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1777
+ warnings.warn(
1778
+ /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1779
+ warnings.warn(
1780
+ /home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
1781
+ warnings.warn(
1782
+ run-lejyafmi.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4.69M/4.69M [00:01<00:00, 3.23MB/s]
wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d45ae7708451f569a26149093624dc7494943def519a728a6ef4093ad80dd382
3
- size 4620288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218ae98ab28234be327e4ea9293f7b5d13580cf3d80509614063d5a55716991b
3
+ size 4685824