lucio commited on
Commit
ae323bb
1 Parent(s): f6eced0

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.22402019798755646,
4
- "eval_runtime": 183.2761,
5
  "eval_samples": 2744,
6
- "eval_samples_per_second": 14.972,
7
- "eval_steps_per_second": 1.871,
8
- "eval_wer": 0.3693335163075797,
9
- "train_loss": 1.3463122907597969,
10
- "train_runtime": 52469.8271,
11
  "train_samples": 6034,
12
- "train_samples_per_second": 11.5,
13
- "train_steps_per_second": 0.358
14
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.20362737774848938,
4
+ "eval_runtime": 146.0593,
5
  "eval_samples": 2744,
6
+ "eval_samples_per_second": 18.787,
7
+ "eval_steps_per_second": 2.348,
8
+ "eval_wer": 0.2976980458560249,
9
+ "train_loss": 1.1418190615227881,
10
+ "train_runtime": 52190.9896,
11
  "train_samples": 6034,
12
+ "train_samples_per_second": 11.561,
13
+ "train_steps_per_second": 0.36
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.22402019798755646,
4
- "eval_runtime": 183.2761,
5
  "eval_samples": 2744,
6
- "eval_samples_per_second": 14.972,
7
- "eval_steps_per_second": 1.871,
8
- "eval_wer": 0.3693335163075797
9
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.20362737774848938,
4
+ "eval_runtime": 146.0593,
5
  "eval_samples": 2744,
6
+ "eval_samples_per_second": 18.787,
7
+ "eval_steps_per_second": 2.348,
8
+ "eval_wer": 0.2976980458560249
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b788ce940efe52c4f0f9aa554f4ab658aa42cb915d854e26431e3bad8f99a128
3
  size 1262079473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c54b4b3156392864c0d5f52b356d2399e3c35afdc09545fb8d7a43903bbf346
3
  size 1262079473
runs/Jan29_15-39-53_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643470896.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.1361704.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80d7ecb3d1b98da6e3ae2dde889ca081824e2057f51531ad44102f97d012c02
3
- size 45635
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7842c98503b0971b00ccff163f0452728d919c4b4f236bb1bf9d06cd292c7f
3
+ size 46475
runs/Jan29_15-39-53_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643523236.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.1361704.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e364e0e5753b1ddc6b335c4cbede724f6457ff77ecde9d8d707f03fef567612f
3
+ size 364
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 1.3463122907597969,
4
- "train_runtime": 52469.8271,
5
  "train_samples": 6034,
6
- "train_samples_per_second": 11.5,
7
- "train_steps_per_second": 0.358
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 1.1418190615227881,
4
+ "train_runtime": 52190.9896,
5
  "train_samples": 6034,
6
+ "train_samples_per_second": 11.561,
7
+ "train_steps_per_second": 0.36
8
  }
trainer_state.json CHANGED
@@ -9,1478 +9,1478 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.53,
12
- "learning_rate": 2.0000000000000003e-06,
13
- "loss": 15.5103,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.06,
18
- "learning_rate": 3.980000000000001e-06,
19
- "loss": 10.25,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.59,
24
- "learning_rate": 5.98e-06,
25
- "loss": 5.588,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.13,
30
- "learning_rate": 7.980000000000002e-06,
31
- "loss": 4.6387,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.66,
36
- "learning_rate": 9.980000000000001e-06,
37
- "loss": 4.1169,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.66,
42
- "eval_loss": 4.01455020904541,
43
- "eval_runtime": 162.7143,
44
- "eval_samples_per_second": 16.864,
45
- "eval_steps_per_second": 2.108,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.19,
51
- "learning_rate": 1.198e-05,
52
- "loss": 3.796,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.72,
57
- "learning_rate": 1.398e-05,
58
- "loss": 3.4906,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.25,
63
- "learning_rate": 1.5980000000000003e-05,
64
- "loss": 3.3596,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.78,
69
- "learning_rate": 1.798e-05,
70
- "loss": 3.2609,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.32,
75
- "learning_rate": 1.9980000000000002e-05,
76
- "loss": 3.2512,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.32,
81
- "eval_loss": 3.234210252761841,
82
- "eval_runtime": 160.5159,
83
- "eval_samples_per_second": 17.095,
84
- "eval_steps_per_second": 2.137,
85
- "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 5.85,
90
- "learning_rate": 2.1980000000000003e-05,
91
- "loss": 3.1927,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.38,
96
- "learning_rate": 2.3980000000000004e-05,
97
- "loss": 3.1722,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 6.91,
102
- "learning_rate": 2.5980000000000002e-05,
103
- "loss": 3.1008,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.45,
108
- "learning_rate": 2.7980000000000003e-05,
109
- "loss": 2.9719,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 7.97,
114
- "learning_rate": 2.9980000000000004e-05,
115
- "loss": 2.5435,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 7.97,
120
- "eval_loss": 1.8155322074890137,
121
- "eval_runtime": 146.5621,
122
- "eval_samples_per_second": 18.722,
123
- "eval_steps_per_second": 2.34,
124
- "eval_wer": 1.0286354695576598,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.51,
129
- "learning_rate": 3.198e-05,
130
- "loss": 2.1032,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.04,
135
- "learning_rate": 3.398e-05,
136
- "loss": 1.8413,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.57,
141
- "learning_rate": 3.5980000000000004e-05,
142
- "loss": 1.7079,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.11,
147
- "learning_rate": 3.7980000000000006e-05,
148
- "loss": 1.6319,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.64,
153
- "learning_rate": 3.998000000000001e-05,
154
- "loss": 1.5575,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.64,
159
- "eval_loss": 0.6345986127853394,
160
- "eval_runtime": 149.5836,
161
- "eval_samples_per_second": 18.344,
162
- "eval_steps_per_second": 2.293,
163
- "eval_wer": 0.705777411829285,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.17,
168
- "learning_rate": 3.976428571428572e-05,
169
- "loss": 1.5137,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 11.7,
174
- "learning_rate": 3.952857142857143e-05,
175
- "loss": 1.468,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.23,
180
- "learning_rate": 3.9290476190476196e-05,
181
- "loss": 1.4546,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 12.76,
186
- "learning_rate": 3.905238095238096e-05,
187
- "loss": 1.4071,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.3,
192
- "learning_rate": 3.881428571428572e-05,
193
- "loss": 1.3979,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.3,
198
- "eval_loss": 0.48850002884864807,
199
- "eval_runtime": 146.6274,
200
- "eval_samples_per_second": 18.714,
201
- "eval_steps_per_second": 2.339,
202
- "eval_wer": 0.6320387905402315,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 13.83,
207
- "learning_rate": 3.857857142857143e-05,
208
- "loss": 1.3532,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.36,
213
- "learning_rate": 3.834047619047619e-05,
214
- "loss": 1.3544,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 14.89,
219
- "learning_rate": 3.810238095238096e-05,
220
- "loss": 1.335,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.42,
225
- "learning_rate": 3.7864285714285715e-05,
226
- "loss": 1.3402,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 15.95,
231
- "learning_rate": 3.762619047619048e-05,
232
- "loss": 1.2874,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 15.95,
237
- "eval_loss": 0.4270566701889038,
238
- "eval_runtime": 150.2121,
239
- "eval_samples_per_second": 18.268,
240
- "eval_steps_per_second": 2.283,
241
- "eval_wer": 0.6088468048122226,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.49,
246
- "learning_rate": 3.7388095238095244e-05,
247
- "loss": 1.2897,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.02,
252
- "learning_rate": 3.715e-05,
253
- "loss": 1.2965,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 17.55,
258
- "learning_rate": 3.6911904761904766e-05,
259
- "loss": 1.2752,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.08,
264
- "learning_rate": 3.6673809523809524e-05,
265
- "loss": 1.2672,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 18.61,
270
- "learning_rate": 3.643571428571429e-05,
271
- "loss": 1.2383,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 18.61,
276
- "eval_loss": 0.3889118432998657,
277
- "eval_runtime": 149.102,
278
- "eval_samples_per_second": 18.404,
279
- "eval_steps_per_second": 2.3,
280
- "eval_wer": 0.586889895247244,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.15,
285
- "learning_rate": 3.619761904761905e-05,
286
- "loss": 1.2479,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 19.68,
291
- "learning_rate": 3.595952380952381e-05,
292
- "loss": 1.2281,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.21,
297
- "learning_rate": 3.5721428571428575e-05,
298
- "loss": 1.2223,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 20.74,
303
- "learning_rate": 3.548333333333333e-05,
304
- "loss": 1.2006,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.28,
309
- "learning_rate": 3.52452380952381e-05,
310
- "loss": 1.2054,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.28,
315
- "eval_loss": 0.3609465956687927,
316
- "eval_runtime": 157.7853,
317
- "eval_samples_per_second": 17.391,
318
- "eval_steps_per_second": 2.174,
319
- "eval_wer": 0.5792964640226889,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 21.81,
324
- "learning_rate": 3.500714285714286e-05,
325
- "loss": 1.1953,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.34,
330
- "learning_rate": 3.476904761904762e-05,
331
- "loss": 1.2022,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 22.87,
336
- "learning_rate": 3.4530952380952384e-05,
337
- "loss": 1.1841,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 23.4,
342
- "learning_rate": 3.429285714285715e-05,
343
- "loss": 1.1941,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 23.93,
348
- "learning_rate": 3.4054761904761906e-05,
349
- "loss": 1.1866,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 23.93,
354
- "eval_loss": 0.3450409471988678,
355
- "eval_runtime": 147.8034,
356
- "eval_samples_per_second": 18.565,
357
- "eval_steps_per_second": 2.321,
358
- "eval_wer": 0.5513471478889347,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 24.47,
363
- "learning_rate": 3.381666666666667e-05,
364
- "loss": 1.1688,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.0,
369
- "learning_rate": 3.357857142857143e-05,
370
- "loss": 1.1772,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 25.53,
375
- "learning_rate": 3.3342857142857146e-05,
376
- "loss": 1.1641,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.06,
381
- "learning_rate": 3.3104761904761904e-05,
382
- "loss": 1.1567,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 26.59,
387
- "learning_rate": 3.286666666666667e-05,
388
- "loss": 1.1332,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 26.59,
393
- "eval_loss": 0.32143130898475647,
394
- "eval_runtime": 151.6356,
395
- "eval_samples_per_second": 18.096,
396
- "eval_steps_per_second": 2.262,
397
- "eval_wer": 0.5378985407803851,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.13,
402
- "learning_rate": 3.262857142857143e-05,
403
- "loss": 1.1436,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 27.66,
408
- "learning_rate": 3.23904761904762e-05,
409
- "loss": 1.1234,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.19,
414
- "learning_rate": 3.215476190476191e-05,
415
- "loss": 1.1273,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 28.72,
420
- "learning_rate": 3.191666666666667e-05,
421
- "loss": 1.1237,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 29.25,
426
- "learning_rate": 3.167857142857143e-05,
427
- "loss": 1.135,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 29.25,
432
- "eval_loss": 0.3122180998325348,
433
- "eval_runtime": 157.9044,
434
- "eval_samples_per_second": 17.378,
435
- "eval_steps_per_second": 2.172,
436
- "eval_wer": 0.538401719957916,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 29.78,
441
- "learning_rate": 3.1440476190476194e-05,
442
- "loss": 1.1127,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 30.32,
447
- "learning_rate": 3.120238095238095e-05,
448
- "loss": 1.1288,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 30.85,
453
- "learning_rate": 3.096428571428572e-05,
454
- "loss": 1.11,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 31.38,
459
- "learning_rate": 3.072619047619048e-05,
460
- "loss": 1.1098,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 31.91,
465
- "learning_rate": 3.048809523809524e-05,
466
- "loss": 1.0992,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 31.91,
471
- "eval_loss": 0.29478520154953003,
472
- "eval_runtime": 149.1488,
473
- "eval_samples_per_second": 18.398,
474
- "eval_steps_per_second": 2.3,
475
- "eval_wer": 0.5078450208133205,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 32.45,
480
- "learning_rate": 3.025e-05,
481
- "loss": 1.1043,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 32.97,
486
- "learning_rate": 3.0011904761904765e-05,
487
- "loss": 1.0963,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 33.51,
492
- "learning_rate": 2.9773809523809526e-05,
493
- "loss": 1.1059,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.04,
498
- "learning_rate": 2.953571428571429e-05,
499
- "loss": 1.095,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 34.57,
504
- "learning_rate": 2.9297619047619048e-05,
505
- "loss": 1.0707,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 34.57,
510
- "eval_loss": 0.2927539348602295,
511
- "eval_runtime": 153.3467,
512
- "eval_samples_per_second": 17.894,
513
- "eval_steps_per_second": 2.237,
514
- "eval_wer": 0.5127853254654408,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 35.11,
519
- "learning_rate": 2.9059523809523812e-05,
520
- "loss": 1.0887,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 35.64,
525
- "learning_rate": 2.8821428571428574e-05,
526
- "loss": 1.0763,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 36.17,
531
- "learning_rate": 2.8583333333333335e-05,
532
- "loss": 1.0805,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 36.7,
537
- "learning_rate": 2.83452380952381e-05,
538
- "loss": 1.0675,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 37.23,
543
- "learning_rate": 2.8107142857142857e-05,
544
- "loss": 1.0754,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 37.23,
549
- "eval_loss": 0.28569138050079346,
550
- "eval_runtime": 156.3448,
551
- "eval_samples_per_second": 17.551,
552
- "eval_steps_per_second": 2.194,
553
- "eval_wer": 0.5016696399981703,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 37.76,
558
- "learning_rate": 2.786904761904762e-05,
559
- "loss": 1.0695,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 38.3,
564
- "learning_rate": 2.7630952380952383e-05,
565
- "loss": 1.0747,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 38.83,
570
- "learning_rate": 2.7392857142857147e-05,
571
- "loss": 1.0551,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 39.36,
576
- "learning_rate": 2.715714285714286e-05,
577
- "loss": 1.0511,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 39.89,
582
- "learning_rate": 2.6919047619047622e-05,
583
- "loss": 1.0461,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 39.89,
588
- "eval_loss": 0.27913743257522583,
589
- "eval_runtime": 146.6834,
590
- "eval_samples_per_second": 18.707,
591
- "eval_steps_per_second": 2.338,
592
- "eval_wer": 0.509949224646631,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 40.42,
597
- "learning_rate": 2.6680952380952387e-05,
598
- "loss": 1.0547,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 40.95,
603
- "learning_rate": 2.6442857142857144e-05,
604
- "loss": 1.0443,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 41.49,
609
- "learning_rate": 2.6204761904761905e-05,
610
- "loss": 1.0582,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 42.02,
615
- "learning_rate": 2.596666666666667e-05,
616
- "loss": 1.0588,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 42.55,
621
- "learning_rate": 2.572857142857143e-05,
622
- "loss": 1.0328,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 42.55,
627
- "eval_loss": 0.2728850841522217,
628
- "eval_runtime": 150.8954,
629
- "eval_samples_per_second": 18.185,
630
- "eval_steps_per_second": 2.273,
631
- "eval_wer": 0.511961941356754,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 43.08,
636
- "learning_rate": 2.5490476190476196e-05,
637
- "loss": 1.0464,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 43.61,
642
- "learning_rate": 2.5252380952380953e-05,
643
- "loss": 1.0289,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 44.15,
648
- "learning_rate": 2.5014285714285714e-05,
649
- "loss": 1.026,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 44.68,
654
- "learning_rate": 2.477619047619048e-05,
655
- "loss": 1.0288,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 45.21,
660
- "learning_rate": 2.453809523809524e-05,
661
- "loss": 1.0201,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 45.21,
666
- "eval_loss": 0.2654191255569458,
667
- "eval_runtime": 150.9056,
668
- "eval_samples_per_second": 18.184,
669
- "eval_steps_per_second": 2.273,
670
- "eval_wer": 0.47198206852385527,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 45.74,
675
- "learning_rate": 2.4300000000000005e-05,
676
- "loss": 1.0167,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 46.28,
681
- "learning_rate": 2.4061904761904762e-05,
682
- "loss": 1.0183,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 46.81,
687
- "learning_rate": 2.3823809523809523e-05,
688
- "loss": 1.0093,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 47.34,
693
- "learning_rate": 2.3585714285714288e-05,
694
- "loss": 1.0236,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 47.87,
699
- "learning_rate": 2.334761904761905e-05,
700
- "loss": 1.0035,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 47.87,
705
- "eval_loss": 0.26225191354751587,
706
- "eval_runtime": 146.6476,
707
- "eval_samples_per_second": 18.712,
708
- "eval_steps_per_second": 2.339,
709
- "eval_wer": 0.4658981748318924,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 48.4,
714
- "learning_rate": 2.3109523809523813e-05,
715
- "loss": 1.0059,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 48.93,
720
- "learning_rate": 2.287142857142857e-05,
721
- "loss": 1.0031,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 49.47,
726
- "learning_rate": 2.2633333333333332e-05,
727
- "loss": 1.0092,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 50.0,
732
- "learning_rate": 2.2395238095238097e-05,
733
- "loss": 0.9908,
734
  "step": 9400
735
  },
736
  {
737
  "epoch": 50.53,
738
- "learning_rate": 2.215952380952381e-05,
739
- "loss": 1.0069,
740
  "step": 9500
741
  },
742
  {
743
  "epoch": 50.53,
744
- "eval_loss": 0.25685444474220276,
745
- "eval_runtime": 152.7548,
746
- "eval_samples_per_second": 17.963,
747
- "eval_steps_per_second": 2.245,
748
- "eval_wer": 0.45931110196239877,
749
  "step": 9500
750
  },
751
  {
752
  "epoch": 51.06,
753
- "learning_rate": 2.1921428571428572e-05,
754
- "loss": 1.0056,
755
  "step": 9600
756
  },
757
  {
758
  "epoch": 51.59,
759
- "learning_rate": 2.1683333333333336e-05,
760
- "loss": 0.9943,
761
  "step": 9700
762
  },
763
  {
764
  "epoch": 52.13,
765
- "learning_rate": 2.1445238095238097e-05,
766
- "loss": 0.9985,
767
  "step": 9800
768
  },
769
  {
770
  "epoch": 52.66,
771
- "learning_rate": 2.120714285714286e-05,
772
- "loss": 0.9858,
773
  "step": 9900
774
  },
775
  {
776
  "epoch": 53.19,
777
- "learning_rate": 2.096904761904762e-05,
778
- "loss": 0.9998,
779
  "step": 10000
780
  },
781
  {
782
  "epoch": 53.19,
783
- "eval_loss": 0.251886785030365,
784
- "eval_runtime": 152.7789,
785
- "eval_samples_per_second": 17.961,
786
- "eval_steps_per_second": 2.245,
787
- "eval_wer": 0.44051049814738574,
788
  "step": 10000
789
  },
790
  {
791
  "epoch": 53.72,
792
- "learning_rate": 2.0730952380952384e-05,
793
- "loss": 0.9829,
794
  "step": 10100
795
  },
796
  {
797
  "epoch": 54.25,
798
- "learning_rate": 2.0492857142857145e-05,
799
- "loss": 0.9944,
800
  "step": 10200
801
  },
802
  {
803
  "epoch": 54.78,
804
- "learning_rate": 2.025476190476191e-05,
805
- "loss": 0.9777,
806
  "step": 10300
807
  },
808
  {
809
  "epoch": 55.32,
810
- "learning_rate": 2.0019047619047624e-05,
811
- "loss": 0.9828,
812
  "step": 10400
813
  },
814
  {
815
  "epoch": 55.85,
816
- "learning_rate": 1.978095238095238e-05,
817
- "loss": 0.9762,
818
  "step": 10500
819
  },
820
  {
821
  "epoch": 55.85,
822
- "eval_loss": 0.2504919767379761,
823
- "eval_runtime": 153.619,
824
- "eval_samples_per_second": 17.862,
825
- "eval_steps_per_second": 2.233,
826
- "eval_wer": 0.45876217922327434,
827
  "step": 10500
828
  },
829
  {
830
  "epoch": 56.38,
831
- "learning_rate": 1.9542857142857143e-05,
832
- "loss": 0.9843,
833
  "step": 10600
834
  },
835
  {
836
  "epoch": 56.91,
837
- "learning_rate": 1.9304761904761907e-05,
838
- "loss": 0.9686,
839
  "step": 10700
840
  },
841
  {
842
  "epoch": 57.45,
843
- "learning_rate": 1.9066666666666668e-05,
844
- "loss": 0.9776,
845
  "step": 10800
846
  },
847
  {
848
  "epoch": 57.97,
849
- "learning_rate": 1.882857142857143e-05,
850
- "loss": 0.9743,
851
  "step": 10900
852
  },
853
  {
854
  "epoch": 58.51,
855
- "learning_rate": 1.859047619047619e-05,
856
- "loss": 0.9755,
857
  "step": 11000
858
  },
859
  {
860
  "epoch": 58.51,
861
- "eval_loss": 0.2478867620229721,
862
- "eval_runtime": 157.921,
863
- "eval_samples_per_second": 17.376,
864
- "eval_steps_per_second": 2.172,
865
- "eval_wer": 0.4563835140204016,
866
  "step": 11000
867
  },
868
  {
869
  "epoch": 59.04,
870
- "learning_rate": 1.8352380952380955e-05,
871
- "loss": 0.9598,
872
  "step": 11100
873
  },
874
  {
875
  "epoch": 59.57,
876
- "learning_rate": 1.8114285714285716e-05,
877
- "loss": 0.9621,
878
  "step": 11200
879
  },
880
  {
881
  "epoch": 60.11,
882
- "learning_rate": 1.7876190476190477e-05,
883
- "loss": 0.9711,
884
  "step": 11300
885
  },
886
  {
887
  "epoch": 60.64,
888
- "learning_rate": 1.7638095238095238e-05,
889
- "loss": 0.9584,
890
  "step": 11400
891
  },
892
  {
893
  "epoch": 61.17,
894
- "learning_rate": 1.7400000000000003e-05,
895
- "loss": 0.9624,
896
  "step": 11500
897
  },
898
  {
899
  "epoch": 61.17,
900
- "eval_loss": 0.24601028859615326,
901
- "eval_runtime": 151.7998,
902
- "eval_samples_per_second": 18.076,
903
- "eval_steps_per_second": 2.26,
904
- "eval_wer": 0.42976076117286494,
905
  "step": 11500
906
  },
907
  {
908
  "epoch": 61.7,
909
- "learning_rate": 1.7161904761904764e-05,
910
- "loss": 0.9489,
911
  "step": 11600
912
  },
913
  {
914
  "epoch": 62.23,
915
- "learning_rate": 1.6923809523809525e-05,
916
- "loss": 0.9563,
917
  "step": 11700
918
  },
919
  {
920
  "epoch": 62.76,
921
- "learning_rate": 1.6685714285714286e-05,
922
- "loss": 0.9483,
923
  "step": 11800
924
  },
925
  {
926
  "epoch": 63.3,
927
- "learning_rate": 1.644761904761905e-05,
928
- "loss": 0.9703,
929
  "step": 11900
930
  },
931
  {
932
  "epoch": 63.83,
933
- "learning_rate": 1.6209523809523812e-05,
934
- "loss": 0.9494,
935
  "step": 12000
936
  },
937
  {
938
  "epoch": 63.83,
939
- "eval_loss": 0.2402362823486328,
940
- "eval_runtime": 160.0311,
941
- "eval_samples_per_second": 17.147,
942
- "eval_steps_per_second": 2.143,
943
- "eval_wer": 0.41823338365125107,
944
  "step": 12000
945
  },
946
  {
947
  "epoch": 64.36,
948
- "learning_rate": 1.5971428571428573e-05,
949
- "loss": 0.9528,
950
  "step": 12100
951
  },
952
  {
953
  "epoch": 64.89,
954
- "learning_rate": 1.5733333333333334e-05,
955
- "loss": 0.9422,
956
  "step": 12200
957
  },
958
  {
959
  "epoch": 65.42,
960
- "learning_rate": 1.5497619047619048e-05,
961
- "loss": 0.9481,
962
  "step": 12300
963
  },
964
  {
965
  "epoch": 65.95,
966
- "learning_rate": 1.5259523809523812e-05,
967
- "loss": 0.9464,
968
  "step": 12400
969
  },
970
  {
971
  "epoch": 66.49,
972
- "learning_rate": 1.5021428571428574e-05,
973
- "loss": 0.948,
974
  "step": 12500
975
  },
976
  {
977
  "epoch": 66.49,
978
- "eval_loss": 0.2412397861480713,
979
- "eval_runtime": 153.0299,
980
- "eval_samples_per_second": 17.931,
981
- "eval_steps_per_second": 2.241,
982
- "eval_wer": 0.4211609715932482,
983
  "step": 12500
984
  },
985
  {
986
  "epoch": 67.02,
987
- "learning_rate": 1.4783333333333335e-05,
988
- "loss": 0.9524,
989
  "step": 12600
990
  },
991
  {
992
  "epoch": 67.55,
993
- "learning_rate": 1.4545238095238097e-05,
994
- "loss": 0.9389,
995
  "step": 12700
996
  },
997
  {
998
  "epoch": 68.08,
999
- "learning_rate": 1.4307142857142857e-05,
1000
- "loss": 0.936,
1001
  "step": 12800
1002
  },
1003
  {
1004
  "epoch": 68.61,
1005
- "learning_rate": 1.406904761904762e-05,
1006
- "loss": 0.9284,
1007
  "step": 12900
1008
  },
1009
  {
1010
  "epoch": 69.15,
1011
- "learning_rate": 1.3830952380952383e-05,
1012
- "loss": 0.9312,
1013
  "step": 13000
1014
  },
1015
  {
1016
  "epoch": 69.15,
1017
- "eval_loss": 0.2352364957332611,
1018
- "eval_runtime": 155.983,
1019
- "eval_samples_per_second": 17.592,
1020
- "eval_steps_per_second": 2.199,
1021
- "eval_wer": 0.39700837107177167,
1022
  "step": 13000
1023
  },
1024
  {
1025
  "epoch": 69.68,
1026
- "learning_rate": 1.3592857142857144e-05,
1027
- "loss": 0.9291,
1028
  "step": 13100
1029
  },
1030
  {
1031
  "epoch": 70.21,
1032
- "learning_rate": 1.3354761904761906e-05,
1033
- "loss": 0.9319,
1034
  "step": 13200
1035
  },
1036
  {
1037
  "epoch": 70.74,
1038
- "learning_rate": 1.311666666666667e-05,
1039
- "loss": 0.9168,
1040
  "step": 13300
1041
  },
1042
  {
1043
  "epoch": 71.28,
1044
- "learning_rate": 1.2878571428571429e-05,
1045
- "loss": 0.9359,
1046
  "step": 13400
1047
  },
1048
  {
1049
  "epoch": 71.81,
1050
- "learning_rate": 1.2640476190476192e-05,
1051
- "loss": 0.9172,
1052
  "step": 13500
1053
  },
1054
  {
1055
  "epoch": 71.81,
1056
- "eval_loss": 0.23573005199432373,
1057
- "eval_runtime": 152.9039,
1058
- "eval_samples_per_second": 17.946,
1059
- "eval_steps_per_second": 2.243,
1060
- "eval_wer": 0.3926169891587759,
1061
  "step": 13500
1062
  },
1063
  {
1064
  "epoch": 72.34,
1065
- "learning_rate": 1.2402380952380953e-05,
1066
- "loss": 0.938,
1067
  "step": 13600
1068
  },
1069
  {
1070
  "epoch": 72.87,
1071
- "learning_rate": 1.2164285714285715e-05,
1072
- "loss": 0.9146,
1073
  "step": 13700
1074
  },
1075
  {
1076
  "epoch": 73.4,
1077
- "learning_rate": 1.1926190476190478e-05,
1078
- "loss": 0.9253,
1079
  "step": 13800
1080
  },
1081
  {
1082
  "epoch": 73.93,
1083
- "learning_rate": 1.1688095238095238e-05,
1084
- "loss": 0.9192,
1085
  "step": 13900
1086
  },
1087
  {
1088
  "epoch": 74.47,
1089
- "learning_rate": 1.145e-05,
1090
- "loss": 0.9101,
1091
  "step": 14000
1092
  },
1093
  {
1094
  "epoch": 74.47,
1095
- "eval_loss": 0.23052847385406494,
1096
- "eval_runtime": 144.6085,
1097
- "eval_samples_per_second": 18.975,
1098
- "eval_steps_per_second": 2.372,
1099
- "eval_wer": 0.39046704176387176,
1100
  "step": 14000
1101
  },
1102
  {
1103
  "epoch": 75.0,
1104
- "learning_rate": 1.1211904761904763e-05,
1105
- "loss": 0.9204,
1106
  "step": 14100
1107
  },
1108
  {
1109
  "epoch": 75.53,
1110
- "learning_rate": 1.0973809523809524e-05,
1111
- "loss": 0.9244,
1112
  "step": 14200
1113
  },
1114
  {
1115
  "epoch": 76.06,
1116
- "learning_rate": 1.0735714285714287e-05,
1117
- "loss": 0.9251,
1118
  "step": 14300
1119
  },
1120
  {
1121
  "epoch": 76.59,
1122
- "learning_rate": 1.0497619047619048e-05,
1123
- "loss": 0.9149,
1124
  "step": 14400
1125
  },
1126
  {
1127
  "epoch": 77.13,
1128
- "learning_rate": 1.0259523809523811e-05,
1129
- "loss": 0.9177,
1130
  "step": 14500
1131
  },
1132
  {
1133
  "epoch": 77.13,
1134
- "eval_loss": 0.23065772652626038,
1135
- "eval_runtime": 154.0486,
1136
- "eval_samples_per_second": 17.813,
1137
- "eval_steps_per_second": 2.227,
1138
- "eval_wer": 0.3837884817711907,
1139
  "step": 14500
1140
  },
1141
  {
1142
  "epoch": 77.66,
1143
- "learning_rate": 1.0021428571428572e-05,
1144
- "loss": 0.902,
1145
  "step": 14600
1146
  },
1147
  {
1148
  "epoch": 78.19,
1149
- "learning_rate": 9.783333333333335e-06,
1150
- "loss": 0.9127,
1151
  "step": 14700
1152
  },
1153
  {
1154
  "epoch": 78.72,
1155
- "learning_rate": 9.545238095238096e-06,
1156
- "loss": 0.9078,
1157
  "step": 14800
1158
  },
1159
  {
1160
  "epoch": 79.25,
1161
- "learning_rate": 9.307142857142857e-06,
1162
- "loss": 0.9066,
1163
  "step": 14900
1164
  },
1165
  {
1166
  "epoch": 79.78,
1167
- "learning_rate": 9.06904761904762e-06,
1168
- "loss": 0.9083,
1169
  "step": 15000
1170
  },
1171
  {
1172
  "epoch": 79.78,
1173
- "eval_loss": 0.2312641590833664,
1174
- "eval_runtime": 159.1357,
1175
- "eval_samples_per_second": 17.243,
1176
- "eval_steps_per_second": 2.155,
1177
- "eval_wer": 0.3799917661589131,
1178
  "step": 15000
1179
  },
1180
  {
1181
  "epoch": 80.32,
1182
- "learning_rate": 8.830952380952381e-06,
1183
- "loss": 0.9123,
1184
  "step": 15100
1185
  },
1186
  {
1187
  "epoch": 80.85,
1188
- "learning_rate": 8.592857142857144e-06,
1189
- "loss": 0.902,
1190
  "step": 15200
1191
  },
1192
  {
1193
  "epoch": 81.38,
1194
- "learning_rate": 8.354761904761905e-06,
1195
- "loss": 0.9114,
1196
  "step": 15300
1197
  },
1198
  {
1199
  "epoch": 81.91,
1200
- "learning_rate": 8.116666666666666e-06,
1201
- "loss": 0.8936,
1202
  "step": 15400
1203
  },
1204
  {
1205
  "epoch": 82.45,
1206
- "learning_rate": 7.878571428571429e-06,
1207
- "loss": 0.9068,
1208
  "step": 15500
1209
  },
1210
  {
1211
  "epoch": 82.45,
1212
- "eval_loss": 0.22753386199474335,
1213
- "eval_runtime": 149.5268,
1214
- "eval_samples_per_second": 18.351,
1215
- "eval_steps_per_second": 2.294,
1216
- "eval_wer": 0.3742280773981062,
1217
  "step": 15500
1218
  },
1219
  {
1220
  "epoch": 82.97,
1221
- "learning_rate": 7.640476190476192e-06,
1222
- "loss": 0.8974,
1223
  "step": 15600
1224
  },
1225
  {
1226
  "epoch": 83.51,
1227
- "learning_rate": 7.402380952380953e-06,
1228
- "loss": 0.9025,
1229
  "step": 15700
1230
  },
1231
  {
1232
  "epoch": 84.04,
1233
- "learning_rate": 7.164285714285715e-06,
1234
- "loss": 0.9008,
1235
  "step": 15800
1236
  },
1237
  {
1238
  "epoch": 84.57,
1239
- "learning_rate": 6.926190476190476e-06,
1240
- "loss": 0.8975,
1241
  "step": 15900
1242
  },
1243
  {
1244
  "epoch": 85.11,
1245
- "learning_rate": 6.688095238095239e-06,
1246
- "loss": 0.9087,
1247
  "step": 16000
1248
  },
1249
  {
1250
  "epoch": 85.11,
1251
- "eval_loss": 0.22831058502197266,
1252
- "eval_runtime": 151.3152,
1253
- "eval_samples_per_second": 18.134,
1254
- "eval_steps_per_second": 2.267,
1255
- "eval_wer": 0.3746855130140433,
1256
  "step": 16000
1257
  },
1258
  {
1259
  "epoch": 85.64,
1260
- "learning_rate": 6.450000000000001e-06,
1261
- "loss": 0.8852,
1262
  "step": 16100
1263
  },
1264
  {
1265
  "epoch": 86.17,
1266
- "learning_rate": 6.211904761904762e-06,
1267
- "loss": 0.8967,
1268
  "step": 16200
1269
  },
1270
  {
1271
  "epoch": 86.7,
1272
- "learning_rate": 5.973809523809524e-06,
1273
- "loss": 0.8862,
1274
  "step": 16300
1275
  },
1276
  {
1277
  "epoch": 87.23,
1278
- "learning_rate": 5.735714285714287e-06,
1279
- "loss": 0.9042,
1280
  "step": 16400
1281
  },
1282
  {
1283
  "epoch": 87.76,
1284
- "learning_rate": 5.497619047619048e-06,
1285
- "loss": 0.8838,
1286
  "step": 16500
1287
  },
1288
  {
1289
  "epoch": 87.76,
1290
- "eval_loss": 0.2285744994878769,
1291
- "eval_runtime": 157.9121,
1292
- "eval_samples_per_second": 17.377,
1293
- "eval_steps_per_second": 2.172,
1294
- "eval_wer": 0.37770458807922785,
1295
  "step": 16500
1296
  },
1297
  {
1298
  "epoch": 88.3,
1299
- "learning_rate": 5.25952380952381e-06,
1300
- "loss": 0.8952,
1301
  "step": 16600
1302
  },
1303
  {
1304
  "epoch": 88.83,
1305
- "learning_rate": 5.021428571428572e-06,
1306
- "loss": 0.8893,
1307
  "step": 16700
1308
  },
1309
  {
1310
  "epoch": 89.36,
1311
- "learning_rate": 4.783333333333334e-06,
1312
- "loss": 0.8933,
1313
  "step": 16800
1314
  },
1315
  {
1316
  "epoch": 89.89,
1317
- "learning_rate": 4.545238095238095e-06,
1318
- "loss": 0.8796,
1319
  "step": 16900
1320
  },
1321
  {
1322
  "epoch": 90.42,
1323
- "learning_rate": 4.307142857142858e-06,
1324
- "loss": 0.8868,
1325
  "step": 17000
1326
  },
1327
  {
1328
  "epoch": 90.42,
1329
- "eval_loss": 0.22693119943141937,
1330
- "eval_runtime": 145.6646,
1331
- "eval_samples_per_second": 18.838,
1332
- "eval_steps_per_second": 2.355,
1333
- "eval_wer": 0.37216961712638946,
1334
  "step": 17000
1335
  },
1336
  {
1337
  "epoch": 90.95,
1338
- "learning_rate": 4.069047619047619e-06,
1339
- "loss": 0.8852,
1340
  "step": 17100
1341
  },
1342
  {
1343
  "epoch": 91.49,
1344
- "learning_rate": 3.830952380952382e-06,
1345
- "loss": 0.8895,
1346
  "step": 17200
1347
  },
1348
  {
1349
  "epoch": 92.02,
1350
- "learning_rate": 3.592857142857143e-06,
1351
- "loss": 0.8858,
1352
  "step": 17300
1353
  },
1354
  {
1355
  "epoch": 92.55,
1356
- "learning_rate": 3.3547619047619052e-06,
1357
- "loss": 0.8852,
1358
  "step": 17400
1359
  },
1360
  {
1361
  "epoch": 93.08,
1362
- "learning_rate": 3.1166666666666668e-06,
1363
- "loss": 0.8895,
1364
  "step": 17500
1365
  },
1366
  {
1367
  "epoch": 93.08,
1368
- "eval_loss": 0.22457998991012573,
1369
- "eval_runtime": 151.9978,
1370
- "eval_samples_per_second": 18.053,
1371
- "eval_steps_per_second": 2.257,
1372
- "eval_wer": 0.37143772014089016,
1373
  "step": 17500
1374
  },
1375
  {
1376
  "epoch": 93.61,
1377
- "learning_rate": 2.8785714285714287e-06,
1378
- "loss": 0.8835,
1379
  "step": 17600
1380
  },
1381
  {
1382
  "epoch": 94.15,
1383
- "learning_rate": 2.6404761904761907e-06,
1384
- "loss": 0.8846,
1385
  "step": 17700
1386
  },
1387
  {
1388
  "epoch": 94.68,
1389
- "learning_rate": 2.4023809523809527e-06,
1390
- "loss": 0.8826,
1391
  "step": 17800
1392
  },
1393
  {
1394
  "epoch": 95.21,
1395
- "learning_rate": 2.1642857142857146e-06,
1396
- "loss": 0.8916,
1397
  "step": 17900
1398
  },
1399
  {
1400
  "epoch": 95.74,
1401
- "learning_rate": 1.926190476190476e-06,
1402
- "loss": 0.8926,
1403
  "step": 18000
1404
  },
1405
  {
1406
  "epoch": 95.74,
1407
- "eval_loss": 0.2240794152021408,
1408
- "eval_runtime": 144.8784,
1409
- "eval_samples_per_second": 18.94,
1410
- "eval_steps_per_second": 2.368,
1411
- "eval_wer": 0.37052284890901604,
1412
  "step": 18000
1413
  },
1414
  {
1415
  "epoch": 96.28,
1416
- "learning_rate": 1.6880952380952381e-06,
1417
- "loss": 0.8885,
1418
  "step": 18100
1419
  },
1420
  {
1421
  "epoch": 96.81,
1422
- "learning_rate": 1.45e-06,
1423
- "loss": 0.8728,
1424
  "step": 18200
1425
  },
1426
  {
1427
  "epoch": 97.34,
1428
- "learning_rate": 1.211904761904762e-06,
1429
- "loss": 0.887,
1430
  "step": 18300
1431
  },
1432
  {
1433
  "epoch": 97.87,
1434
- "learning_rate": 9.73809523809524e-07,
1435
- "loss": 0.8809,
1436
  "step": 18400
1437
  },
1438
  {
1439
  "epoch": 98.4,
1440
- "learning_rate": 7.404761904761905e-07,
1441
- "loss": 0.8856,
1442
  "step": 18500
1443
  },
1444
  {
1445
  "epoch": 98.4,
1446
- "eval_loss": 0.22416575253009796,
1447
- "eval_runtime": 184.721,
1448
- "eval_samples_per_second": 14.855,
1449
- "eval_steps_per_second": 1.857,
1450
- "eval_wer": 0.3693335163075797,
1451
  "step": 18500
1452
  },
1453
  {
1454
  "epoch": 98.93,
1455
- "learning_rate": 5.023809523809524e-07,
1456
- "loss": 0.8738,
1457
  "step": 18600
1458
  },
1459
  {
1460
  "epoch": 99.47,
1461
- "learning_rate": 2.6428571428571433e-07,
1462
- "loss": 0.8831,
1463
  "step": 18700
1464
  },
1465
  {
1466
  "epoch": 100.0,
1467
- "learning_rate": 2.619047619047619e-08,
1468
- "loss": 0.877,
1469
  "step": 18800
1470
  },
1471
  {
1472
  "epoch": 100.0,
1473
  "step": 18800,
1474
- "total_flos": 1.0790071156798875e+20,
1475
- "train_loss": 1.3463122907597969,
1476
- "train_runtime": 52469.8271,
1477
- "train_samples_per_second": 11.5,
1478
- "train_steps_per_second": 0.358
1479
  }
1480
  ],
1481
  "max_steps": 18800,
1482
  "num_train_epochs": 100,
1483
- "total_flos": 1.0790071156798875e+20,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }
 
9
  "log_history": [
10
  {
11
  "epoch": 0.53,
12
+ "learning_rate": 4.950000000000001e-06,
13
+ "loss": 13.7083,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.06,
18
+ "learning_rate": 9.950000000000001e-06,
19
+ "loss": 5.8958,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.59,
24
+ "learning_rate": 1.4950000000000001e-05,
25
+ "loss": 4.115,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.13,
30
+ "learning_rate": 1.995e-05,
31
+ "loss": 3.6068,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.66,
36
+ "learning_rate": 2.495e-05,
37
+ "loss": 3.2892,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.66,
42
+ "eval_loss": 3.241530179977417,
43
+ "eval_runtime": 149.1051,
44
+ "eval_samples_per_second": 18.403,
45
+ "eval_steps_per_second": 2.3,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.19,
51
+ "learning_rate": 2.995e-05,
52
+ "loss": 3.2316,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.72,
57
+ "learning_rate": 3.495e-05,
58
+ "loss": 3.1529,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.25,
63
+ "learning_rate": 3.995e-05,
64
+ "loss": 3.1279,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.78,
69
+ "learning_rate": 4.495e-05,
70
+ "loss": 3.0647,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.32,
75
+ "learning_rate": 4.995e-05,
76
+ "loss": 2.9206,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.32,
81
+ "eval_loss": 2.4381155967712402,
82
+ "eval_runtime": 158.7474,
83
+ "eval_samples_per_second": 17.285,
84
+ "eval_steps_per_second": 2.161,
85
+ "eval_wer": 1.0055832685002974,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 5.85,
90
+ "learning_rate": 5.495e-05,
91
+ "loss": 2.2804,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.38,
96
+ "learning_rate": 5.995000000000001e-05,
97
+ "loss": 1.8138,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 6.91,
102
+ "learning_rate": 6.49e-05,
103
+ "loss": 1.6251,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.45,
108
+ "learning_rate": 6.99e-05,
109
+ "loss": 1.5483,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 7.97,
114
+ "learning_rate": 7.49e-05,
115
+ "loss": 1.4909,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 7.97,
120
+ "eval_loss": 0.5427731275558472,
121
+ "eval_runtime": 150.2181,
122
+ "eval_samples_per_second": 18.267,
123
+ "eval_steps_per_second": 2.283,
124
+ "eval_wer": 0.6704956294906411,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.51,
129
+ "learning_rate": 7.99e-05,
130
+ "loss": 1.4577,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.04,
135
+ "learning_rate": 8.49e-05,
136
+ "loss": 1.4197,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.57,
141
+ "learning_rate": 8.985e-05,
142
+ "loss": 1.3877,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.11,
147
+ "learning_rate": 9.485e-05,
148
+ "loss": 1.3704,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.64,
153
+ "learning_rate": 9.985000000000001e-05,
154
+ "loss": 1.3395,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.64,
159
+ "eval_loss": 0.4207160472869873,
160
+ "eval_runtime": 153.5378,
161
+ "eval_samples_per_second": 17.872,
162
+ "eval_steps_per_second": 2.234,
163
+ "eval_wer": 0.5995148963434168,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.17,
168
+ "learning_rate": 9.942261904761904e-05,
169
+ "loss": 1.3349,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 11.7,
174
+ "learning_rate": 9.882738095238095e-05,
175
+ "loss": 1.3064,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.23,
180
+ "learning_rate": 9.823214285714287e-05,
181
+ "loss": 1.3132,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 12.76,
186
+ "learning_rate": 9.763690476190477e-05,
187
+ "loss": 1.2813,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.3,
192
+ "learning_rate": 9.704166666666668e-05,
193
+ "loss": 1.2718,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.3,
198
+ "eval_loss": 0.37430423498153687,
199
+ "eval_runtime": 148.1352,
200
+ "eval_samples_per_second": 18.524,
201
+ "eval_steps_per_second": 2.315,
202
+ "eval_wer": 0.5648254084481259,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 13.83,
207
+ "learning_rate": 9.644642857142857e-05,
208
+ "loss": 1.247,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.36,
213
+ "learning_rate": 9.585119047619047e-05,
214
+ "loss": 1.2446,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 14.89,
219
+ "learning_rate": 9.525595238095239e-05,
220
+ "loss": 1.2404,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.42,
225
+ "learning_rate": 9.46607142857143e-05,
226
+ "loss": 1.234,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 15.95,
231
+ "learning_rate": 9.40654761904762e-05,
232
+ "loss": 1.1798,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 15.95,
237
+ "eval_loss": 0.32250717282295227,
238
+ "eval_runtime": 150.8327,
239
+ "eval_samples_per_second": 18.192,
240
+ "eval_steps_per_second": 2.274,
241
+ "eval_wer": 0.4927005629032996,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.49,
246
+ "learning_rate": 9.34702380952381e-05,
247
+ "loss": 1.1849,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.02,
252
+ "learning_rate": 9.2875e-05,
253
+ "loss": 1.1931,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 17.55,
258
+ "learning_rate": 9.227976190476191e-05,
259
+ "loss": 1.1628,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.08,
264
+ "learning_rate": 9.168452380952382e-05,
265
+ "loss": 1.1743,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 18.61,
270
+ "learning_rate": 9.108928571428572e-05,
271
+ "loss": 1.1392,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 18.61,
276
+ "eval_loss": 0.3096984922885895,
277
+ "eval_runtime": 151.6787,
278
+ "eval_samples_per_second": 18.091,
279
+ "eval_steps_per_second": 2.261,
280
+ "eval_wer": 0.4626790535902247,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.15,
285
+ "learning_rate": 9.049404761904763e-05,
286
+ "loss": 1.1458,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 19.68,
291
+ "learning_rate": 8.989880952380953e-05,
292
+ "loss": 1.1323,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.21,
297
+ "learning_rate": 8.930357142857143e-05,
298
+ "loss": 1.1293,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 20.74,
303
+ "learning_rate": 8.870833333333334e-05,
304
+ "loss": 1.1179,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.28,
309
+ "learning_rate": 8.811309523809524e-05,
310
+ "loss": 1.1143,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.28,
315
+ "eval_loss": 0.29957136511802673,
316
+ "eval_runtime": 147.8067,
317
+ "eval_samples_per_second": 18.565,
318
+ "eval_steps_per_second": 2.321,
319
+ "eval_wer": 0.450459933183836,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 21.81,
324
+ "learning_rate": 8.751785714285715e-05,
325
+ "loss": 1.1121,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.34,
330
+ "learning_rate": 8.692261904761905e-05,
331
+ "loss": 1.1049,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 22.87,
336
+ "learning_rate": 8.632738095238096e-05,
337
+ "loss": 1.092,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 23.4,
342
+ "learning_rate": 8.573214285714286e-05,
343
+ "loss": 1.0936,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 23.93,
348
+ "learning_rate": 8.513690476190477e-05,
349
+ "loss": 1.0923,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 23.93,
354
+ "eval_loss": 0.2841183543205261,
355
+ "eval_runtime": 145.4218,
356
+ "eval_samples_per_second": 18.869,
357
+ "eval_steps_per_second": 2.359,
358
+ "eval_wer": 0.4229097066495813,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 24.47,
363
+ "learning_rate": 8.454166666666667e-05,
364
+ "loss": 1.0748,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.0,
369
+ "learning_rate": 8.395238095238095e-05,
370
+ "loss": 1.0786,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 25.53,
375
+ "learning_rate": 8.335714285714286e-05,
376
+ "loss": 1.0685,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.06,
381
+ "learning_rate": 8.276190476190476e-05,
382
+ "loss": 1.0681,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 26.59,
387
+ "learning_rate": 8.216666666666667e-05,
388
+ "loss": 1.0516,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 26.59,
393
+ "eval_loss": 0.2705024182796478,
394
+ "eval_runtime": 156.97,
395
+ "eval_samples_per_second": 17.481,
396
+ "eval_steps_per_second": 2.185,
397
+ "eval_wer": 0.4113312891858496,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.13,
402
+ "learning_rate": 8.157142857142857e-05,
403
+ "loss": 1.0631,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 27.66,
408
+ "learning_rate": 8.097619047619049e-05,
409
+ "loss": 1.0438,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.19,
414
+ "learning_rate": 8.03809523809524e-05,
415
+ "loss": 1.0437,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 28.72,
420
+ "learning_rate": 7.978571428571429e-05,
421
+ "loss": 1.0334,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 29.25,
426
+ "learning_rate": 7.919047619047619e-05,
427
+ "loss": 1.051,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 29.25,
432
+ "eval_loss": 0.26215311884880066,
433
+ "eval_runtime": 149.1886,
434
+ "eval_samples_per_second": 18.393,
435
+ "eval_steps_per_second": 2.299,
436
+ "eval_wer": 0.4078074230012356,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 29.78,
441
+ "learning_rate": 7.85952380952381e-05,
442
+ "loss": 1.0209,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 30.32,
447
+ "learning_rate": 7.800000000000001e-05,
448
+ "loss": 1.0393,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 30.85,
453
+ "learning_rate": 7.740476190476192e-05,
454
+ "loss": 1.0175,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 31.38,
459
+ "learning_rate": 7.680952380952381e-05,
460
+ "loss": 1.0208,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 31.91,
465
+ "learning_rate": 7.621428571428571e-05,
466
+ "loss": 1.021,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 31.91,
471
+ "eval_loss": 0.26111042499542236,
472
+ "eval_runtime": 144.8158,
473
+ "eval_samples_per_second": 18.948,
474
+ "eval_steps_per_second": 2.369,
475
+ "eval_wer": 0.40085121962381587,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 32.45,
480
+ "learning_rate": 7.561904761904762e-05,
481
+ "loss": 1.0091,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 32.97,
486
+ "learning_rate": 7.502380952380953e-05,
487
+ "loss": 1.0007,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 33.51,
492
+ "learning_rate": 7.442857142857144e-05,
493
+ "loss": 1.0141,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.04,
498
+ "learning_rate": 7.383333333333333e-05,
499
+ "loss": 1.0072,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 34.57,
504
+ "learning_rate": 7.323809523809523e-05,
505
+ "loss": 0.9886,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 34.57,
510
+ "eval_loss": 0.24984091520309448,
511
+ "eval_runtime": 146.0157,
512
+ "eval_samples_per_second": 18.792,
513
+ "eval_steps_per_second": 2.349,
514
+ "eval_wer": 0.3920644364102329,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 35.11,
519
+ "learning_rate": 7.264285714285715e-05,
520
+ "loss": 0.9955,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 35.64,
525
+ "learning_rate": 7.205357142857144e-05,
526
+ "loss": 0.9811,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 36.17,
531
+ "learning_rate": 7.145833333333334e-05,
532
+ "loss": 0.987,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 36.7,
537
+ "learning_rate": 7.086309523809524e-05,
538
+ "loss": 0.9776,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 37.23,
543
+ "learning_rate": 7.026785714285714e-05,
544
+ "loss": 0.984,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 37.23,
549
+ "eval_loss": 0.25214260816574097,
550
+ "eval_runtime": 151.9558,
551
+ "eval_samples_per_second": 18.058,
552
+ "eval_steps_per_second": 2.257,
553
+ "eval_wer": 0.38446753009015605,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 37.76,
558
+ "learning_rate": 6.967261904761905e-05,
559
+ "loss": 0.98,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 38.3,
564
+ "learning_rate": 6.907738095238096e-05,
565
+ "loss": 0.9762,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 38.83,
570
+ "learning_rate": 6.848214285714286e-05,
571
+ "loss": 0.9601,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 39.36,
576
+ "learning_rate": 6.788690476190477e-05,
577
+ "loss": 0.9633,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 39.89,
582
+ "learning_rate": 6.729166666666667e-05,
583
+ "loss": 0.9631,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 39.89,
588
+ "eval_loss": 0.24125833809375763,
589
+ "eval_runtime": 185.0035,
590
+ "eval_samples_per_second": 14.832,
591
+ "eval_steps_per_second": 1.854,
592
+ "eval_wer": 0.3790673195734749,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 40.42,
597
+ "learning_rate": 6.669642857142858e-05,
598
+ "loss": 0.9653,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 40.95,
603
+ "learning_rate": 6.610119047619048e-05,
604
+ "loss": 0.9482,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 41.49,
609
+ "learning_rate": 6.550595238095238e-05,
610
+ "loss": 0.9547,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 42.02,
615
+ "learning_rate": 6.491071428571429e-05,
616
+ "loss": 0.9635,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 42.55,
621
+ "learning_rate": 6.432142857142857e-05,
622
+ "loss": 0.9353,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 42.55,
627
+ "eval_loss": 0.23913756012916565,
628
+ "eval_runtime": 145.0257,
629
+ "eval_samples_per_second": 18.921,
630
+ "eval_steps_per_second": 2.365,
631
+ "eval_wer": 0.3611734016749806,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 43.08,
636
+ "learning_rate": 6.372619047619049e-05,
637
+ "loss": 0.9484,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 43.61,
642
+ "learning_rate": 6.313095238095238e-05,
643
+ "loss": 0.933,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 44.15,
648
+ "learning_rate": 6.253571428571429e-05,
649
+ "loss": 0.9315,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 44.68,
654
+ "learning_rate": 6.194047619047619e-05,
655
+ "loss": 0.9337,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 45.21,
660
+ "learning_rate": 6.13452380952381e-05,
661
+ "loss": 0.922,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 45.21,
666
+ "eval_loss": 0.2362910658121109,
667
+ "eval_runtime": 152.2446,
668
+ "eval_samples_per_second": 18.024,
669
+ "eval_steps_per_second": 2.253,
670
+ "eval_wer": 0.3570545970436136,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 45.74,
675
+ "learning_rate": 6.0750000000000006e-05,
676
+ "loss": 0.9274,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 46.28,
681
+ "learning_rate": 6.0154761904761904e-05,
682
+ "loss": 0.9243,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 46.81,
687
+ "learning_rate": 5.955952380952381e-05,
688
+ "loss": 0.9148,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 47.34,
693
+ "learning_rate": 5.896428571428572e-05,
694
+ "loss": 0.9309,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 47.87,
699
+ "learning_rate": 5.8369047619047624e-05,
700
+ "loss": 0.9116,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 47.87,
705
+ "eval_loss": 0.2284734547138214,
706
+ "eval_runtime": 145.6764,
707
+ "eval_samples_per_second": 18.836,
708
+ "eval_steps_per_second": 2.355,
709
+ "eval_wer": 0.366756670175278,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 48.4,
714
+ "learning_rate": 5.777380952380953e-05,
715
+ "loss": 0.908,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 48.93,
720
+ "learning_rate": 5.7178571428571426e-05,
721
+ "loss": 0.9045,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 49.47,
726
+ "learning_rate": 5.658333333333333e-05,
727
+ "loss": 0.9069,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 50.0,
732
+ "learning_rate": 5.598809523809524e-05,
733
+ "loss": 0.886,
734
  "step": 9400
735
  },
736
  {
737
  "epoch": 50.53,
738
+ "learning_rate": 5.539285714285715e-05,
739
+ "loss": 0.8951,
740
  "step": 9500
741
  },
742
  {
743
  "epoch": 50.53,
744
+ "eval_loss": 0.22562462091445923,
745
+ "eval_runtime": 146.467,
746
+ "eval_samples_per_second": 18.735,
747
+ "eval_steps_per_second": 2.342,
748
+ "eval_wer": 0.3729348771223285,
749
  "step": 9500
750
  },
751
  {
752
  "epoch": 51.06,
753
+ "learning_rate": 5.479761904761905e-05,
754
+ "loss": 0.9011,
755
  "step": 9600
756
  },
757
  {
758
  "epoch": 51.59,
759
+ "learning_rate": 5.420238095238096e-05,
760
+ "loss": 0.8858,
761
  "step": 9700
762
  },
763
  {
764
  "epoch": 52.13,
765
+ "learning_rate": 5.360714285714285e-05,
766
+ "loss": 0.8955,
767
  "step": 9800
768
  },
769
  {
770
  "epoch": 52.66,
771
+ "learning_rate": 5.3011904761904765e-05,
772
+ "loss": 0.8747,
773
  "step": 9900
774
  },
775
  {
776
  "epoch": 53.19,
777
+ "learning_rate": 5.241666666666667e-05,
778
+ "loss": 0.8865,
779
  "step": 10000
780
  },
781
  {
782
  "epoch": 53.19,
783
+ "eval_loss": 0.22283457219600677,
784
+ "eval_runtime": 147.1995,
785
+ "eval_samples_per_second": 18.641,
786
+ "eval_steps_per_second": 2.33,
787
+ "eval_wer": 0.3663447897121413,
788
  "step": 10000
789
  },
790
  {
791
  "epoch": 53.72,
792
+ "learning_rate": 5.1821428571428574e-05,
793
+ "loss": 0.8671,
794
  "step": 10100
795
  },
796
  {
797
  "epoch": 54.25,
798
+ "learning_rate": 5.1226190476190485e-05,
799
+ "loss": 0.8771,
800
  "step": 10200
801
  },
802
  {
803
  "epoch": 54.78,
804
+ "learning_rate": 5.063690476190477e-05,
805
+ "loss": 0.8777,
806
  "step": 10300
807
  },
808
  {
809
  "epoch": 55.32,
810
+ "learning_rate": 5.0041666666666666e-05,
811
+ "loss": 0.8805,
812
  "step": 10400
813
  },
814
  {
815
  "epoch": 55.85,
816
+ "learning_rate": 4.944642857142857e-05,
817
+ "loss": 0.8792,
818
  "step": 10500
819
  },
820
  {
821
  "epoch": 55.85,
822
+ "eval_loss": 0.22211238741874695,
823
+ "eval_runtime": 149.6455,
824
+ "eval_samples_per_second": 18.337,
825
+ "eval_steps_per_second": 2.292,
826
+ "eval_wer": 0.3656125577776761,
827
  "step": 10500
828
  },
829
  {
830
  "epoch": 56.38,
831
+ "learning_rate": 4.885119047619048e-05,
832
+ "loss": 0.8805,
833
  "step": 10600
834
  },
835
  {
836
  "epoch": 56.91,
837
+ "learning_rate": 4.8255952380952386e-05,
838
+ "loss": 0.8653,
839
  "step": 10700
840
  },
841
  {
842
  "epoch": 57.45,
843
+ "learning_rate": 4.7660714285714284e-05,
844
+ "loss": 0.8749,
845
  "step": 10800
846
  },
847
  {
848
  "epoch": 57.97,
849
+ "learning_rate": 4.7065476190476195e-05,
850
+ "loss": 0.8713,
851
  "step": 10900
852
  },
853
  {
854
  "epoch": 58.51,
855
+ "learning_rate": 4.64702380952381e-05,
856
+ "loss": 0.8682,
857
  "step": 11000
858
  },
859
  {
860
  "epoch": 58.51,
861
+ "eval_loss": 0.22277939319610596,
862
+ "eval_runtime": 153.8771,
863
+ "eval_samples_per_second": 17.832,
864
+ "eval_steps_per_second": 2.229,
865
+ "eval_wer": 0.3322960047595076,
866
  "step": 11000
867
  },
868
  {
869
  "epoch": 59.04,
870
+ "learning_rate": 4.5875000000000004e-05,
871
+ "loss": 0.8556,
872
  "step": 11100
873
  },
874
  {
875
  "epoch": 59.57,
876
+ "learning_rate": 4.528571428571429e-05,
877
+ "loss": 0.8508,
878
  "step": 11200
879
  },
880
  {
881
  "epoch": 60.11,
882
+ "learning_rate": 4.469047619047619e-05,
883
+ "loss": 0.8594,
884
  "step": 11300
885
  },
886
  {
887
  "epoch": 60.64,
888
+ "learning_rate": 4.4095238095238096e-05,
889
+ "loss": 0.8441,
890
  "step": 11400
891
  },
892
  {
893
  "epoch": 61.17,
894
+ "learning_rate": 4.35e-05,
895
+ "loss": 0.8492,
896
  "step": 11500
897
  },
898
  {
899
  "epoch": 61.17,
900
+ "eval_loss": 0.2166604995727539,
901
+ "eval_runtime": 147.2191,
902
+ "eval_samples_per_second": 18.639,
903
+ "eval_steps_per_second": 2.33,
904
+ "eval_wer": 0.3446066541577045,
905
  "step": 11500
906
  },
907
  {
908
  "epoch": 61.7,
909
+ "learning_rate": 4.290476190476191e-05,
910
+ "loss": 0.8353,
911
  "step": 11600
912
  },
913
  {
914
  "epoch": 62.23,
915
+ "learning_rate": 4.230952380952381e-05,
916
+ "loss": 0.8407,
917
  "step": 11700
918
  },
919
  {
920
  "epoch": 62.76,
921
+ "learning_rate": 4.1714285714285714e-05,
922
+ "loss": 0.8335,
923
  "step": 11800
924
  },
925
  {
926
  "epoch": 63.3,
927
+ "learning_rate": 4.1119047619047625e-05,
928
+ "loss": 0.8596,
929
  "step": 11900
930
  },
931
  {
932
  "epoch": 63.83,
933
+ "learning_rate": 4.052380952380952e-05,
934
+ "loss": 0.8365,
935
  "step": 12000
936
  },
937
  {
938
  "epoch": 63.83,
939
+ "eval_loss": 0.21556589007377625,
940
+ "eval_runtime": 144.6076,
941
+ "eval_samples_per_second": 18.975,
942
+ "eval_steps_per_second": 2.372,
943
+ "eval_wer": 0.33211294677589126,
944
  "step": 12000
945
  },
946
  {
947
  "epoch": 64.36,
948
+ "learning_rate": 3.9928571428571434e-05,
949
+ "loss": 0.8412,
950
  "step": 12100
951
  },
952
  {
953
  "epoch": 64.89,
954
+ "learning_rate": 3.933333333333333e-05,
955
+ "loss": 0.8301,
956
  "step": 12200
957
  },
958
  {
959
  "epoch": 65.42,
960
+ "learning_rate": 3.873809523809524e-05,
961
+ "loss": 0.8283,
962
  "step": 12300
963
  },
964
  {
965
  "epoch": 65.95,
966
+ "learning_rate": 3.814285714285715e-05,
967
+ "loss": 0.8257,
968
  "step": 12400
969
  },
970
  {
971
  "epoch": 66.49,
972
+ "learning_rate": 3.7547619047619045e-05,
973
+ "loss": 0.8298,
974
  "step": 12500
975
  },
976
  {
977
  "epoch": 66.49,
978
+ "eval_loss": 0.21417230367660522,
979
+ "eval_runtime": 151.8253,
980
+ "eval_samples_per_second": 18.073,
981
+ "eval_steps_per_second": 2.259,
982
+ "eval_wer": 0.3400302045672967,
983
  "step": 12500
984
  },
985
  {
986
  "epoch": 67.02,
987
+ "learning_rate": 3.6952380952380956e-05,
988
+ "loss": 0.834,
989
  "step": 12600
990
  },
991
  {
992
  "epoch": 67.55,
993
+ "learning_rate": 3.6357142857142854e-05,
994
+ "loss": 0.8155,
995
  "step": 12700
996
  },
997
  {
998
  "epoch": 68.08,
999
+ "learning_rate": 3.5761904761904765e-05,
1000
+ "loss": 0.8157,
1001
  "step": 12800
1002
  },
1003
  {
1004
  "epoch": 68.61,
1005
+ "learning_rate": 3.516666666666667e-05,
1006
+ "loss": 0.8027,
1007
  "step": 12900
1008
  },
1009
  {
1010
  "epoch": 69.15,
1011
+ "learning_rate": 3.4571428571428574e-05,
1012
+ "loss": 0.808,
1013
  "step": 13000
1014
  },
1015
  {
1016
  "epoch": 69.15,
1017
+ "eval_loss": 0.20793649554252625,
1018
+ "eval_runtime": 147.4919,
1019
+ "eval_samples_per_second": 18.604,
1020
+ "eval_steps_per_second": 2.326,
1021
+ "eval_wer": 0.31476820282824586,
1022
  "step": 13000
1023
  },
1024
  {
1025
  "epoch": 69.68,
1026
+ "learning_rate": 3.397619047619048e-05,
1027
+ "loss": 0.807,
1028
  "step": 13100
1029
  },
1030
  {
1031
  "epoch": 70.21,
1032
+ "learning_rate": 3.338095238095238e-05,
1033
+ "loss": 0.8164,
1034
  "step": 13200
1035
  },
1036
  {
1037
  "epoch": 70.74,
1038
+ "learning_rate": 3.278571428571429e-05,
1039
+ "loss": 0.7979,
1040
  "step": 13300
1041
  },
1042
  {
1043
  "epoch": 71.28,
1044
+ "learning_rate": 3.219047619047619e-05,
1045
+ "loss": 0.815,
1046
  "step": 13400
1047
  },
1048
  {
1049
  "epoch": 71.81,
1050
+ "learning_rate": 3.15952380952381e-05,
1051
+ "loss": 0.7999,
1052
  "step": 13500
1053
  },
1054
  {
1055
  "epoch": 71.81,
1056
+ "eval_loss": 0.21165262162685394,
1057
+ "eval_runtime": 145.5259,
1058
+ "eval_samples_per_second": 18.856,
1059
+ "eval_steps_per_second": 2.357,
1060
+ "eval_wer": 0.32254816713193907,
1061
  "step": 13500
1062
  },
1063
  {
1064
  "epoch": 72.34,
1065
+ "learning_rate": 3.1e-05,
1066
+ "loss": 0.8143,
1067
  "step": 13600
1068
  },
1069
  {
1070
  "epoch": 72.87,
1071
+ "learning_rate": 3.040476190476191e-05,
1072
+ "loss": 0.7952,
1073
  "step": 13700
1074
  },
1075
  {
1076
  "epoch": 73.4,
1077
+ "learning_rate": 2.980952380952381e-05,
1078
+ "loss": 0.8075,
1079
  "step": 13800
1080
  },
1081
  {
1082
  "epoch": 73.93,
1083
+ "learning_rate": 2.9214285714285715e-05,
1084
+ "loss": 0.8021,
1085
  "step": 13900
1086
  },
1087
  {
1088
  "epoch": 74.47,
1089
+ "learning_rate": 2.8619047619047623e-05,
1090
+ "loss": 0.7871,
1091
  "step": 14000
1092
  },
1093
  {
1094
  "epoch": 74.47,
1095
+ "eval_loss": 0.2087966501712799,
1096
+ "eval_runtime": 154.8982,
1097
+ "eval_samples_per_second": 17.715,
1098
+ "eval_steps_per_second": 2.214,
1099
+ "eval_wer": 0.31742254359068234,
1100
  "step": 14000
1101
  },
1102
  {
1103
  "epoch": 75.0,
1104
+ "learning_rate": 2.8023809523809524e-05,
1105
+ "loss": 0.7898,
1106
  "step": 14100
1107
  },
1108
  {
1109
  "epoch": 75.53,
1110
+ "learning_rate": 2.742857142857143e-05,
1111
+ "loss": 0.7972,
1112
  "step": 14200
1113
  },
1114
  {
1115
  "epoch": 76.06,
1116
+ "learning_rate": 2.6833333333333333e-05,
1117
+ "loss": 0.801,
1118
  "step": 14300
1119
  },
1120
  {
1121
  "epoch": 76.59,
1122
+ "learning_rate": 2.623809523809524e-05,
1123
+ "loss": 0.786,
1124
  "step": 14400
1125
  },
1126
  {
1127
  "epoch": 77.13,
1128
+ "learning_rate": 2.5642857142857145e-05,
1129
+ "loss": 0.7858,
1130
  "step": 14500
1131
  },
1132
  {
1133
  "epoch": 77.13,
1134
+ "eval_loss": 0.2059505730867386,
1135
+ "eval_runtime": 147.0151,
1136
+ "eval_samples_per_second": 18.665,
1137
+ "eval_steps_per_second": 2.333,
1138
+ "eval_wer": 0.3008100315775022,
1139
  "step": 14500
1140
  },
1141
  {
1142
  "epoch": 77.66,
1143
+ "learning_rate": 2.5047619047619046e-05,
1144
+ "loss": 0.7753,
1145
  "step": 14600
1146
  },
1147
  {
1148
  "epoch": 78.19,
1149
+ "learning_rate": 2.4452380952380954e-05,
1150
+ "loss": 0.7794,
1151
  "step": 14700
1152
  },
1153
  {
1154
  "epoch": 78.72,
1155
+ "learning_rate": 2.385714285714286e-05,
1156
+ "loss": 0.7775,
1157
  "step": 14800
1158
  },
1159
  {
1160
  "epoch": 79.25,
1161
+ "learning_rate": 2.3261904761904763e-05,
1162
+ "loss": 0.7753,
1163
  "step": 14900
1164
  },
1165
  {
1166
  "epoch": 79.78,
1167
+ "learning_rate": 2.2666666666666668e-05,
1168
+ "loss": 0.7764,
1169
  "step": 15000
1170
  },
1171
  {
1172
  "epoch": 79.78,
1173
+ "eval_loss": 0.2128456085920334,
1174
+ "eval_runtime": 150.2974,
1175
+ "eval_samples_per_second": 18.257,
1176
+ "eval_steps_per_second": 2.282,
1177
+ "eval_wer": 0.3145851448446295,
1178
  "step": 15000
1179
  },
1180
  {
1181
  "epoch": 80.32,
1182
+ "learning_rate": 2.2071428571428572e-05,
1183
+ "loss": 0.7847,
1184
  "step": 15100
1185
  },
1186
  {
1187
  "epoch": 80.85,
1188
+ "learning_rate": 2.1476190476190477e-05,
1189
+ "loss": 0.774,
1190
  "step": 15200
1191
  },
1192
  {
1193
  "epoch": 81.38,
1194
+ "learning_rate": 2.0886904761904763e-05,
1195
+ "loss": 0.7739,
1196
  "step": 15300
1197
  },
1198
  {
1199
  "epoch": 81.91,
1200
+ "learning_rate": 2.0291666666666667e-05,
1201
+ "loss": 0.7579,
1202
  "step": 15400
1203
  },
1204
  {
1205
  "epoch": 82.45,
1206
+ "learning_rate": 1.9696428571428572e-05,
1207
+ "loss": 0.7684,
1208
  "step": 15500
1209
  },
1210
  {
1211
  "epoch": 82.45,
1212
+ "eval_loss": 0.20856936275959015,
1213
+ "eval_runtime": 149.2381,
1214
+ "eval_samples_per_second": 18.387,
1215
+ "eval_steps_per_second": 2.298,
1216
+ "eval_wer": 0.31005445975012585,
1217
  "step": 15500
1218
  },
1219
  {
1220
  "epoch": 82.97,
1221
+ "learning_rate": 1.9101190476190476e-05,
1222
+ "loss": 0.7584,
1223
  "step": 15600
1224
  },
1225
  {
1226
  "epoch": 83.51,
1227
+ "learning_rate": 1.850595238095238e-05,
1228
+ "loss": 0.7653,
1229
  "step": 15700
1230
  },
1231
  {
1232
  "epoch": 84.04,
1233
+ "learning_rate": 1.7910714285714285e-05,
1234
+ "loss": 0.7628,
1235
  "step": 15800
1236
  },
1237
  {
1238
  "epoch": 84.57,
1239
+ "learning_rate": 1.731547619047619e-05,
1240
+ "loss": 0.76,
1241
  "step": 15900
1242
  },
1243
  {
1244
  "epoch": 85.11,
1245
+ "learning_rate": 1.6720238095238098e-05,
1246
+ "loss": 0.7717,
1247
  "step": 16000
1248
  },
1249
  {
1250
  "epoch": 85.11,
1251
+ "eval_loss": 0.20475880801677704,
1252
+ "eval_runtime": 151.8607,
1253
+ "eval_samples_per_second": 18.069,
1254
+ "eval_steps_per_second": 2.259,
1255
+ "eval_wer": 0.3068509450368404,
1256
  "step": 16000
1257
  },
1258
  {
1259
  "epoch": 85.64,
1260
+ "learning_rate": 1.6125000000000002e-05,
1261
+ "loss": 0.7459,
1262
  "step": 16100
1263
  },
1264
  {
1265
  "epoch": 86.17,
1266
+ "learning_rate": 1.5529761904761907e-05,
1267
+ "loss": 0.7561,
1268
  "step": 16200
1269
  },
1270
  {
1271
  "epoch": 86.7,
1272
+ "learning_rate": 1.493452380952381e-05,
1273
+ "loss": 0.7457,
1274
  "step": 16300
1275
  },
1276
  {
1277
  "epoch": 87.23,
1278
+ "learning_rate": 1.4339285714285716e-05,
1279
+ "loss": 0.7599,
1280
  "step": 16400
1281
  },
1282
  {
1283
  "epoch": 87.76,
1284
+ "learning_rate": 1.374404761904762e-05,
1285
+ "loss": 0.7435,
1286
  "step": 16500
1287
  },
1288
  {
1289
  "epoch": 87.76,
1290
+ "eval_loss": 0.20274706184864044,
1291
+ "eval_runtime": 146.6164,
1292
+ "eval_samples_per_second": 18.716,
1293
+ "eval_steps_per_second": 2.339,
1294
+ "eval_wer": 0.3054780101597181,
1295
  "step": 16500
1296
  },
1297
  {
1298
  "epoch": 88.3,
1299
+ "learning_rate": 1.3148809523809525e-05,
1300
+ "loss": 0.7519,
1301
  "step": 16600
1302
  },
1303
  {
1304
  "epoch": 88.83,
1305
+ "learning_rate": 1.255357142857143e-05,
1306
+ "loss": 0.7483,
1307
  "step": 16700
1308
  },
1309
  {
1310
  "epoch": 89.36,
1311
+ "learning_rate": 1.1958333333333334e-05,
1312
+ "loss": 0.7483,
1313
  "step": 16800
1314
  },
1315
  {
1316
  "epoch": 89.89,
1317
+ "learning_rate": 1.1363095238095238e-05,
1318
+ "loss": 0.7353,
1319
  "step": 16900
1320
  },
1321
  {
1322
  "epoch": 90.42,
1323
+ "learning_rate": 1.0767857142857143e-05,
1324
+ "loss": 0.7378,
1325
  "step": 17000
1326
  },
1327
  {
1328
  "epoch": 90.42,
1329
+ "eval_loss": 0.20591045916080475,
1330
+ "eval_runtime": 153.244,
1331
+ "eval_samples_per_second": 17.906,
1332
+ "eval_steps_per_second": 2.238,
1333
+ "eval_wer": 0.29925403871676354,
1334
  "step": 17000
1335
  },
1336
  {
1337
  "epoch": 90.95,
1338
+ "learning_rate": 1.0172619047619047e-05,
1339
+ "loss": 0.7383,
1340
  "step": 17100
1341
  },
1342
  {
1343
  "epoch": 91.49,
1344
+ "learning_rate": 9.577380952380953e-06,
1345
+ "loss": 0.7466,
1346
  "step": 17200
1347
  },
1348
  {
1349
  "epoch": 92.02,
1350
+ "learning_rate": 8.982142857142856e-06,
1351
+ "loss": 0.7377,
1352
  "step": 17300
1353
  },
1354
  {
1355
  "epoch": 92.55,
1356
+ "learning_rate": 8.386904761904762e-06,
1357
+ "loss": 0.7369,
1358
  "step": 17400
1359
  },
1360
  {
1361
  "epoch": 93.08,
1362
+ "learning_rate": 7.791666666666667e-06,
1363
+ "loss": 0.7406,
1364
  "step": 17500
1365
  },
1366
  {
1367
  "epoch": 93.08,
1368
+ "eval_loss": 0.2040216028690338,
1369
+ "eval_runtime": 148.029,
1370
+ "eval_samples_per_second": 18.537,
1371
+ "eval_steps_per_second": 2.317,
1372
+ "eval_wer": 0.2966454624502311,
1373
  "step": 17500
1374
  },
1375
  {
1376
  "epoch": 93.61,
1377
+ "learning_rate": 7.2023809523809524e-06,
1378
+ "loss": 0.7348,
1379
  "step": 17600
1380
  },
1381
  {
1382
  "epoch": 94.15,
1383
+ "learning_rate": 6.613095238095239e-06,
1384
+ "loss": 0.7356,
1385
  "step": 17700
1386
  },
1387
  {
1388
  "epoch": 94.68,
1389
+ "learning_rate": 6.017857142857143e-06,
1390
+ "loss": 0.7305,
1391
  "step": 17800
1392
  },
1393
  {
1394
  "epoch": 95.21,
1395
+ "learning_rate": 5.422619047619048e-06,
1396
+ "loss": 0.741,
1397
  "step": 17900
1398
  },
1399
  {
1400
  "epoch": 95.74,
1401
+ "learning_rate": 4.827380952380952e-06,
1402
+ "loss": 0.7361,
1403
  "step": 18000
1404
  },
1405
  {
1406
  "epoch": 95.74,
1407
+ "eval_loss": 0.20563913881778717,
1408
+ "eval_runtime": 145.4837,
1409
+ "eval_samples_per_second": 18.861,
1410
+ "eval_steps_per_second": 2.358,
1411
+ "eval_wer": 0.30003203514713284,
1412
  "step": 18000
1413
  },
1414
  {
1415
  "epoch": 96.28,
1416
+ "learning_rate": 4.232142857142858e-06,
1417
+ "loss": 0.7359,
1418
  "step": 18100
1419
  },
1420
  {
1421
  "epoch": 96.81,
1422
+ "learning_rate": 3.636904761904762e-06,
1423
+ "loss": 0.7246,
1424
  "step": 18200
1425
  },
1426
  {
1427
  "epoch": 97.34,
1428
+ "learning_rate": 3.041666666666667e-06,
1429
+ "loss": 0.7353,
1430
  "step": 18300
1431
  },
1432
  {
1433
  "epoch": 97.87,
1434
+ "learning_rate": 2.4464285714285715e-06,
1435
+ "loss": 0.7305,
1436
  "step": 18400
1437
  },
1438
  {
1439
  "epoch": 98.4,
1440
+ "learning_rate": 1.8511904761904762e-06,
1441
+ "loss": 0.7379,
1442
  "step": 18500
1443
  },
1444
  {
1445
  "epoch": 98.4,
1446
+ "eval_loss": 0.20308499038219452,
1447
+ "eval_runtime": 146.2002,
1448
+ "eval_samples_per_second": 18.769,
1449
+ "eval_steps_per_second": 2.346,
1450
+ "eval_wer": 0.29756075236831264,
1451
  "step": 18500
1452
  },
1453
  {
1454
  "epoch": 98.93,
1455
+ "learning_rate": 1.2559523809523812e-06,
1456
+ "loss": 0.724,
1457
  "step": 18600
1458
  },
1459
  {
1460
  "epoch": 99.47,
1461
+ "learning_rate": 6.607142857142858e-07,
1462
+ "loss": 0.7339,
1463
  "step": 18700
1464
  },
1465
  {
1466
  "epoch": 100.0,
1467
+ "learning_rate": 6.547619047619047e-08,
1468
+ "loss": 0.7277,
1469
  "step": 18800
1470
  },
1471
  {
1472
  "epoch": 100.0,
1473
  "step": 18800,
1474
+ "total_flos": 1.0789860816574084e+20,
1475
+ "train_loss": 1.1418190615227881,
1476
+ "train_runtime": 52190.9896,
1477
+ "train_samples_per_second": 11.561,
1478
+ "train_steps_per_second": 0.36
1479
  }
1480
  ],
1481
  "max_steps": 18800,
1482
  "num_train_epochs": 100,
1483
+ "total_flos": 1.0789860816574084e+20,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }