DrishtiSharma commited on
Commit
0536174
1 Parent(s): fcedf11

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +211 -211
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 50.0,
3
- "eval_loss": 0.7062973976135254,
4
- "eval_runtime": 74.685,
5
  "eval_samples": 1700,
6
- "eval_samples_per_second": 22.762,
7
- "eval_steps_per_second": 1.433,
8
- "eval_wer": 0.5557036754903828,
9
- "train_loss": 0.7598760600297347,
10
- "train_runtime": 13000.5685,
11
  "train_samples": 3676,
12
- "train_samples_per_second": 14.138,
13
- "train_steps_per_second": 0.442
14
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "eval_loss": 0.5196508765220642,
4
+ "eval_runtime": 80.8909,
5
  "eval_samples": 1700,
6
+ "eval_samples_per_second": 21.016,
7
+ "eval_steps_per_second": 1.323,
8
+ "eval_wer": 0.4689265536723164,
9
+ "train_loss": 1.123646092788033,
10
+ "train_runtime": 13730.8326,
11
  "train_samples": 3676,
12
+ "train_samples_per_second": 13.386,
13
+ "train_steps_per_second": 0.419
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 50.0,
3
- "eval_loss": 0.7062973976135254,
4
- "eval_runtime": 74.685,
5
  "eval_samples": 1700,
6
- "eval_samples_per_second": 22.762,
7
- "eval_steps_per_second": 1.433,
8
- "eval_wer": 0.5557036754903828
9
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "eval_loss": 0.5196508765220642,
4
+ "eval_runtime": 80.8909,
5
  "eval_samples": 1700,
6
+ "eval_samples_per_second": 21.016,
7
+ "eval_steps_per_second": 1.323,
8
+ "eval_wer": 0.4689265536723164
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 50.0,
3
- "train_loss": 0.7598760600297347,
4
- "train_runtime": 13000.5685,
5
  "train_samples": 3676,
6
- "train_samples_per_second": 14.138,
7
- "train_steps_per_second": 0.442
8
  }
 
1
  {
2
  "epoch": 50.0,
3
+ "train_loss": 1.123646092788033,
4
+ "train_runtime": 13730.8326,
5
  "train_samples": 3676,
6
+ "train_samples_per_second": 13.386,
7
+ "train_steps_per_second": 0.419
8
  }
trainer_state.json CHANGED
@@ -9,525 +9,525 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.87,
12
- "learning_rate": 5.7166666666666664e-06,
13
- "loss": 13.1691,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.74,
18
- "learning_rate": 1.155e-05,
19
- "loss": 5.7916,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.61,
24
- "learning_rate": 1.738333333333333e-05,
25
- "loss": 3.9554,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.61,
30
- "eval_loss": 3.8166096210479736,
31
- "eval_runtime": 76.9069,
32
- "eval_samples_per_second": 22.105,
33
- "eval_steps_per_second": 1.391,
34
  "eval_wer": 1.0,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.48,
39
- "learning_rate": 2.3216666666666664e-05,
40
- "loss": 3.4177,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 4.35,
45
- "learning_rate": 2.9049999999999995e-05,
46
- "loss": 3.1088,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.22,
51
- "learning_rate": 3.488333333333333e-05,
52
- "loss": 2.9853,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 5.22,
57
- "eval_loss": 2.979321002960205,
58
- "eval_runtime": 76.5995,
59
- "eval_samples_per_second": 22.193,
60
- "eval_steps_per_second": 1.397,
61
  "eval_wer": 1.0,
62
  "step": 600
63
  },
64
  {
65
  "epoch": 6.09,
66
- "learning_rate": 4.071666666666666e-05,
67
- "loss": 2.6633,
68
  "step": 700
69
  },
70
  {
71
  "epoch": 6.96,
72
- "learning_rate": 4.655e-05,
73
- "loss": 1.3761,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 7.83,
78
- "learning_rate": 5.2383333333333324e-05,
79
- "loss": 0.6939,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 7.83,
84
- "eval_loss": 0.972253143787384,
85
- "eval_runtime": 77.2431,
86
- "eval_samples_per_second": 22.008,
87
- "eval_steps_per_second": 1.385,
88
- "eval_wer": 0.9149368374277915,
89
  "step": 900
90
  },
91
  {
92
  "epoch": 8.7,
93
- "learning_rate": 5.821666666666666e-05,
94
- "loss": 0.5141,
95
  "step": 1000
96
  },
97
  {
98
  "epoch": 9.57,
99
- "learning_rate": 6.405e-05,
100
- "loss": 0.4189,
101
  "step": 1100
102
  },
103
  {
104
  "epoch": 10.43,
105
- "learning_rate": 6.988333333333333e-05,
106
- "loss": 0.352,
107
  "step": 1200
108
  },
109
  {
110
  "epoch": 10.43,
111
- "eval_loss": 0.6556435823440552,
112
- "eval_runtime": 76.3866,
113
- "eval_samples_per_second": 22.255,
114
- "eval_steps_per_second": 1.401,
115
- "eval_wer": 0.7296387989589285,
116
  "step": 1200
117
  },
118
  {
119
  "epoch": 11.3,
120
- "learning_rate": 6.849230769230769e-05,
121
- "loss": 0.307,
122
  "step": 1300
123
  },
124
  {
125
  "epoch": 12.17,
126
- "learning_rate": 6.695384615384615e-05,
127
- "loss": 0.2658,
128
  "step": 1400
129
  },
130
  {
131
  "epoch": 13.04,
132
- "learning_rate": 6.541538461538461e-05,
133
- "loss": 0.2369,
134
  "step": 1500
135
  },
136
  {
137
  "epoch": 13.04,
138
- "eval_loss": 0.6314664483070374,
139
- "eval_runtime": 76.2318,
140
- "eval_samples_per_second": 22.3,
141
- "eval_steps_per_second": 1.404,
142
- "eval_wer": 0.6927569351869485,
143
  "step": 1500
144
  },
145
  {
146
  "epoch": 13.91,
147
- "learning_rate": 6.387692307692307e-05,
148
- "loss": 0.2084,
149
  "step": 1600
150
  },
151
  {
152
  "epoch": 14.78,
153
- "learning_rate": 6.233846153846153e-05,
154
- "loss": 0.1904,
155
  "step": 1700
156
  },
157
  {
158
  "epoch": 15.65,
159
- "learning_rate": 6.0799999999999994e-05,
160
- "loss": 0.1757,
161
  "step": 1800
162
  },
163
  {
164
  "epoch": 15.65,
165
- "eval_loss": 0.6387060284614563,
166
- "eval_runtime": 76.1573,
167
- "eval_samples_per_second": 22.322,
168
- "eval_steps_per_second": 1.405,
169
- "eval_wer": 0.6723798641528598,
170
  "step": 1800
171
  },
172
  {
173
  "epoch": 16.52,
174
- "learning_rate": 5.9261538461538453e-05,
175
- "loss": 0.1693,
176
  "step": 1900
177
  },
178
  {
179
  "epoch": 17.39,
180
- "learning_rate": 5.772307692307692e-05,
181
- "loss": 0.1572,
182
  "step": 2000
183
  },
184
  {
185
  "epoch": 18.26,
186
- "learning_rate": 5.618461538461538e-05,
187
- "loss": 0.1517,
188
  "step": 2100
189
  },
190
  {
191
  "epoch": 18.26,
192
- "eval_loss": 0.6607237458229065,
193
- "eval_runtime": 77.027,
194
- "eval_samples_per_second": 22.07,
195
- "eval_steps_per_second": 1.389,
196
- "eval_wer": 0.6265473243191773,
197
  "step": 2100
198
  },
199
  {
200
  "epoch": 19.13,
201
- "learning_rate": 5.464615384615384e-05,
202
- "loss": 0.1403,
203
  "step": 2200
204
  },
205
  {
206
  "epoch": 20.0,
207
- "learning_rate": 5.3107692307692305e-05,
208
- "loss": 0.1382,
209
  "step": 2300
210
  },
211
  {
212
  "epoch": 20.87,
213
- "learning_rate": 5.1569230769230765e-05,
214
- "loss": 0.1229,
215
  "step": 2400
216
  },
217
  {
218
  "epoch": 20.87,
219
- "eval_loss": 0.6519985198974609,
220
- "eval_runtime": 76.1295,
221
- "eval_samples_per_second": 22.33,
222
- "eval_steps_per_second": 1.405,
223
- "eval_wer": 0.6182949279502317,
224
  "step": 2400
225
  },
226
  {
227
  "epoch": 21.74,
228
- "learning_rate": 5.0030769230769225e-05,
229
- "loss": 0.129,
230
  "step": 2500
231
  },
232
  {
233
  "epoch": 22.61,
234
- "learning_rate": 4.849230769230769e-05,
235
- "loss": 0.1172,
236
  "step": 2600
237
  },
238
  {
239
  "epoch": 23.48,
240
- "learning_rate": 4.695384615384615e-05,
241
- "loss": 0.1201,
242
  "step": 2700
243
  },
244
  {
245
  "epoch": 23.48,
246
- "eval_loss": 0.67499840259552,
247
- "eval_runtime": 76.4187,
248
- "eval_samples_per_second": 22.246,
249
- "eval_steps_per_second": 1.4,
250
- "eval_wer": 0.6115660509109376,
251
  "step": 2700
252
  },
253
  {
254
  "epoch": 24.35,
255
- "learning_rate": 4.541538461538461e-05,
256
- "loss": 0.1112,
257
  "step": 2800
258
  },
259
  {
260
  "epoch": 25.22,
261
- "learning_rate": 4.387692307692307e-05,
262
- "loss": 0.1131,
263
  "step": 2900
264
  },
265
  {
266
  "epoch": 26.09,
267
- "learning_rate": 4.2338461538461536e-05,
268
- "loss": 0.1076,
269
  "step": 3000
270
  },
271
  {
272
  "epoch": 26.09,
273
- "eval_loss": 0.6698060035705566,
274
- "eval_runtime": 75.169,
275
- "eval_samples_per_second": 22.616,
276
- "eval_steps_per_second": 1.423,
277
- "eval_wer": 0.6006474957151019,
278
  "step": 3000
279
  },
280
  {
281
  "epoch": 26.96,
282
- "learning_rate": 4.0799999999999996e-05,
283
- "loss": 0.104,
284
  "step": 3100
285
  },
286
  {
287
  "epoch": 27.83,
288
- "learning_rate": 3.9261538461538455e-05,
289
- "loss": 0.1014,
290
  "step": 3200
291
  },
292
  {
293
  "epoch": 28.7,
294
- "learning_rate": 3.772307692307692e-05,
295
- "loss": 0.1006,
296
  "step": 3300
297
  },
298
  {
299
  "epoch": 28.7,
300
- "eval_loss": 0.680385947227478,
301
- "eval_runtime": 75.8528,
302
- "eval_samples_per_second": 22.412,
303
- "eval_steps_per_second": 1.411,
304
- "eval_wer": 0.5871897416365137,
305
  "step": 3300
306
  },
307
  {
308
  "epoch": 29.57,
309
- "learning_rate": 3.618461538461538e-05,
310
- "loss": 0.1013,
311
  "step": 3400
312
  },
313
  {
314
  "epoch": 30.43,
315
- "learning_rate": 3.464615384615384e-05,
316
- "loss": 0.0972,
317
  "step": 3500
318
  },
319
  {
320
  "epoch": 31.3,
321
- "learning_rate": 3.310769230769231e-05,
322
- "loss": 0.0952,
323
  "step": 3600
324
  },
325
  {
326
  "epoch": 31.3,
327
- "eval_loss": 0.7009897828102112,
328
- "eval_runtime": 74.9836,
329
- "eval_samples_per_second": 22.672,
330
- "eval_steps_per_second": 1.427,
331
- "eval_wer": 0.600266615882689,
332
  "step": 3600
333
  },
334
  {
335
  "epoch": 32.17,
336
- "learning_rate": 3.156923076923077e-05,
337
- "loss": 0.091,
338
  "step": 3700
339
  },
340
  {
341
  "epoch": 33.04,
342
- "learning_rate": 3.0030769230769226e-05,
343
- "loss": 0.0924,
344
  "step": 3800
345
  },
346
  {
347
  "epoch": 33.91,
348
- "learning_rate": 2.849230769230769e-05,
349
- "loss": 0.0894,
350
  "step": 3900
351
  },
352
  {
353
  "epoch": 33.91,
354
- "eval_loss": 0.7068695425987244,
355
- "eval_runtime": 74.6323,
356
- "eval_samples_per_second": 22.778,
357
- "eval_steps_per_second": 1.434,
358
- "eval_wer": 0.584015743033073,
359
  "step": 3900
360
  },
361
  {
362
  "epoch": 34.78,
363
- "learning_rate": 2.695384615384615e-05,
364
- "loss": 0.0869,
365
  "step": 4000
366
  },
367
  {
368
  "epoch": 35.65,
369
- "learning_rate": 2.5415384615384612e-05,
370
- "loss": 0.0848,
371
  "step": 4100
372
  },
373
  {
374
  "epoch": 36.52,
375
- "learning_rate": 2.3876923076923075e-05,
376
- "loss": 0.0873,
377
  "step": 4200
378
  },
379
  {
380
  "epoch": 36.52,
381
- "eval_loss": 0.6764819622039795,
382
- "eval_runtime": 75.5212,
383
- "eval_samples_per_second": 22.51,
384
- "eval_steps_per_second": 1.417,
385
- "eval_wer": 0.5781121056306735,
386
  "step": 4200
387
  },
388
  {
389
  "epoch": 37.39,
390
- "learning_rate": 2.2338461538461534e-05,
391
- "loss": 0.0845,
392
  "step": 4300
393
  },
394
  {
395
  "epoch": 38.26,
396
- "learning_rate": 2.0799999999999997e-05,
397
- "loss": 0.0813,
398
  "step": 4400
399
  },
400
  {
401
  "epoch": 39.13,
402
- "learning_rate": 1.9261538461538457e-05,
403
- "loss": 0.0798,
404
  "step": 4500
405
  },
406
  {
407
  "epoch": 39.13,
408
- "eval_loss": 0.6821776032447815,
409
- "eval_runtime": 76.5806,
410
- "eval_samples_per_second": 22.199,
411
- "eval_steps_per_second": 1.397,
412
- "eval_wer": 0.5617342728369199,
413
  "step": 4500
414
  },
415
  {
416
  "epoch": 40.0,
417
- "learning_rate": 1.772307692307692e-05,
418
- "loss": 0.0783,
419
  "step": 4600
420
  },
421
  {
422
  "epoch": 40.87,
423
- "learning_rate": 1.6199999999999997e-05,
424
- "loss": 0.0753,
425
  "step": 4700
426
  },
427
  {
428
  "epoch": 41.74,
429
- "learning_rate": 1.466153846153846e-05,
430
- "loss": 0.0767,
431
  "step": 4800
432
  },
433
  {
434
  "epoch": 41.74,
435
- "eval_loss": 0.7003459334373474,
436
- "eval_runtime": 74.7077,
437
- "eval_samples_per_second": 22.755,
438
- "eval_steps_per_second": 1.432,
439
- "eval_wer": 0.5637656319431219,
440
  "step": 4800
441
  },
442
  {
443
  "epoch": 42.61,
444
- "learning_rate": 1.3123076923076922e-05,
445
- "loss": 0.0739,
446
  "step": 4900
447
  },
448
  {
449
  "epoch": 43.48,
450
- "learning_rate": 1.1584615384615385e-05,
451
- "loss": 0.0766,
452
  "step": 5000
453
  },
454
  {
455
  "epoch": 44.35,
456
- "learning_rate": 1.0046153846153846e-05,
457
- "loss": 0.0717,
458
  "step": 5100
459
  },
460
  {
461
  "epoch": 44.35,
462
- "eval_loss": 0.6969868540763855,
463
- "eval_runtime": 74.6644,
464
- "eval_samples_per_second": 22.769,
465
- "eval_steps_per_second": 1.433,
466
- "eval_wer": 0.5590681140100299,
467
  "step": 5100
468
  },
469
  {
470
  "epoch": 45.22,
471
- "learning_rate": 8.507692307692307e-06,
472
- "loss": 0.0732,
473
  "step": 5200
474
  },
475
  {
476
  "epoch": 46.09,
477
- "learning_rate": 6.9692307692307684e-06,
478
- "loss": 0.0713,
479
  "step": 5300
480
  },
481
  {
482
  "epoch": 46.96,
483
- "learning_rate": 5.4307692307692306e-06,
484
- "loss": 0.0687,
485
  "step": 5400
486
  },
487
  {
488
  "epoch": 46.96,
489
- "eval_loss": 0.7072671055793762,
490
- "eval_runtime": 74.5796,
491
- "eval_samples_per_second": 22.794,
492
- "eval_steps_per_second": 1.435,
493
- "eval_wer": 0.560337713451406,
494
  "step": 5400
495
  },
496
  {
497
  "epoch": 47.83,
498
- "learning_rate": 3.892307692307692e-06,
499
- "loss": 0.0674,
500
  "step": 5500
501
  },
502
  {
503
  "epoch": 48.7,
504
- "learning_rate": 2.3538461538461536e-06,
505
- "loss": 0.0702,
506
  "step": 5600
507
  },
508
  {
509
  "epoch": 49.57,
510
- "learning_rate": 8.153846153846153e-07,
511
- "loss": 0.0669,
512
  "step": 5700
513
  },
514
  {
515
  "epoch": 49.57,
516
- "eval_loss": 0.7059715390205383,
517
- "eval_runtime": 74.624,
518
- "eval_samples_per_second": 22.781,
519
- "eval_steps_per_second": 1.434,
520
- "eval_wer": 0.5558941154065892,
521
  "step": 5700
522
  },
523
  {
524
  "epoch": 50.0,
525
  "step": 5750,
526
  "total_flos": 2.9609940258263142e+19,
527
- "train_loss": 0.7598760600297347,
528
- "train_runtime": 13000.5685,
529
- "train_samples_per_second": 14.138,
530
- "train_steps_per_second": 0.442
531
  }
532
  ],
533
  "max_steps": 5750,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.87,
12
+ "learning_rate": 3.3949999999999997e-06,
13
+ "loss": 13.6823,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.74,
18
+ "learning_rate": 6.895e-06,
19
+ "loss": 7.5854,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.61,
24
+ "learning_rate": 1.0394999999999998e-05,
25
+ "loss": 4.3711,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.61,
30
+ "eval_loss": 4.312221050262451,
31
+ "eval_runtime": 80.91,
32
+ "eval_samples_per_second": 21.011,
33
+ "eval_steps_per_second": 1.322,
34
  "eval_wer": 1.0,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.48,
39
+ "learning_rate": 1.3895e-05,
40
+ "loss": 3.8129,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 4.35,
45
+ "learning_rate": 1.7395e-05,
46
+ "loss": 3.4258,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.22,
51
+ "learning_rate": 2.0894999999999996e-05,
52
+ "loss": 3.1653,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 5.22,
57
+ "eval_loss": 3.115588426589966,
58
+ "eval_runtime": 81.1622,
59
+ "eval_samples_per_second": 20.946,
60
+ "eval_steps_per_second": 1.318,
61
  "eval_wer": 1.0,
62
  "step": 600
63
  },
64
  {
65
  "epoch": 6.09,
66
+ "learning_rate": 2.4394999999999996e-05,
67
+ "loss": 3.0356,
68
  "step": 700
69
  },
70
  {
71
  "epoch": 6.96,
72
+ "learning_rate": 2.7895e-05,
73
+ "loss": 2.9791,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 7.83,
78
+ "learning_rate": 3.1395e-05,
79
+ "loss": 2.8904,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 7.83,
84
+ "eval_loss": 2.842055320739746,
85
+ "eval_runtime": 84.1755,
86
+ "eval_samples_per_second": 20.196,
87
+ "eval_steps_per_second": 1.271,
88
+ "eval_wer": 0.9918110836031232,
89
  "step": 900
90
  },
91
  {
92
  "epoch": 8.7,
93
+ "learning_rate": 3.4895e-05,
94
+ "loss": 2.1422,
95
  "step": 1000
96
  },
97
  {
98
  "epoch": 9.57,
99
+ "learning_rate": 3.8394999999999994e-05,
100
+ "loss": 1.2257,
101
  "step": 1100
102
  },
103
  {
104
  "epoch": 10.43,
105
+ "learning_rate": 4.1895e-05,
106
+ "loss": 0.9207,
107
  "step": 1200
108
  },
109
  {
110
  "epoch": 10.43,
111
+ "eval_loss": 0.9894591569900513,
112
+ "eval_runtime": 82.3044,
113
+ "eval_samples_per_second": 20.655,
114
+ "eval_steps_per_second": 1.3,
115
+ "eval_wer": 0.8688503777058338,
116
  "step": 1200
117
  },
118
  {
119
  "epoch": 11.3,
120
+ "learning_rate": 4.5394999999999995e-05,
121
+ "loss": 0.7881,
122
  "step": 1300
123
  },
124
  {
125
  "epoch": 12.17,
126
+ "learning_rate": 4.8895e-05,
127
+ "loss": 0.7047,
128
  "step": 1400
129
  },
130
  {
131
  "epoch": 13.04,
132
+ "learning_rate": 5.2395e-05,
133
+ "loss": 0.6384,
134
  "step": 1500
135
  },
136
  {
137
  "epoch": 13.04,
138
+ "eval_loss": 0.6993927359580994,
139
+ "eval_runtime": 82.4631,
140
+ "eval_samples_per_second": 20.615,
141
+ "eval_steps_per_second": 1.298,
142
+ "eval_wer": 0.7700120611946931,
143
  "step": 1500
144
  },
145
  {
146
  "epoch": 13.91,
147
+ "learning_rate": 5.589499999999999e-05,
148
+ "loss": 0.5989,
149
  "step": 1600
150
  },
151
  {
152
  "epoch": 14.78,
153
+ "learning_rate": 5.9394999999999996e-05,
154
+ "loss": 0.5601,
155
  "step": 1700
156
  },
157
  {
158
  "epoch": 15.65,
159
+ "learning_rate": 6.289499999999999e-05,
160
+ "loss": 0.5215,
161
  "step": 1800
162
  },
163
  {
164
  "epoch": 15.65,
165
+ "eval_loss": 0.5627515912055969,
166
+ "eval_runtime": 81.3944,
167
+ "eval_samples_per_second": 20.886,
168
+ "eval_steps_per_second": 1.315,
169
+ "eval_wer": 0.6443217164984447,
170
  "step": 1800
171
  },
172
  {
173
  "epoch": 16.52,
174
+ "learning_rate": 6.639499999999999e-05,
175
+ "loss": 0.4949,
176
  "step": 1900
177
  },
178
  {
179
  "epoch": 17.39,
180
+ "learning_rate": 6.9895e-05,
181
+ "loss": 0.4869,
182
  "step": 2000
183
  },
184
  {
185
  "epoch": 18.26,
186
+ "learning_rate": 6.818933333333333e-05,
187
+ "loss": 0.4573,
188
  "step": 2100
189
  },
190
  {
191
  "epoch": 18.26,
192
+ "eval_loss": 0.5316212177276611,
193
+ "eval_runtime": 82.2602,
194
+ "eval_samples_per_second": 20.666,
195
+ "eval_steps_per_second": 1.301,
196
+ "eval_wer": 0.6174062083412684,
197
  "step": 2100
198
  },
199
  {
200
  "epoch": 19.13,
201
+ "learning_rate": 6.632266666666666e-05,
202
+ "loss": 0.438,
203
  "step": 2200
204
  },
205
  {
206
  "epoch": 20.0,
207
+ "learning_rate": 6.4456e-05,
208
+ "loss": 0.4153,
209
  "step": 2300
210
  },
211
  {
212
  "epoch": 20.87,
213
+ "learning_rate": 6.258933333333333e-05,
214
+ "loss": 0.3875,
215
  "step": 2400
216
  },
217
  {
218
  "epoch": 20.87,
219
+ "eval_loss": 0.4931696653366089,
220
+ "eval_runtime": 80.8801,
221
+ "eval_samples_per_second": 21.019,
222
+ "eval_steps_per_second": 1.323,
223
+ "eval_wer": 0.5778581857423982,
224
  "step": 2400
225
  },
226
  {
227
  "epoch": 21.74,
228
+ "learning_rate": 6.072266666666667e-05,
229
+ "loss": 0.3807,
230
  "step": 2500
231
  },
232
  {
233
  "epoch": 22.61,
234
+ "learning_rate": 5.8855999999999993e-05,
235
+ "loss": 0.3715,
236
  "step": 2600
237
  },
238
  {
239
  "epoch": 23.48,
240
+ "learning_rate": 5.6989333333333333e-05,
241
+ "loss": 0.3562,
242
  "step": 2700
243
  },
244
  {
245
  "epoch": 23.48,
246
+ "eval_loss": 0.4971640110015869,
247
+ "eval_runtime": 82.278,
248
+ "eval_samples_per_second": 20.662,
249
+ "eval_steps_per_second": 1.3,
250
+ "eval_wer": 0.547514759093506,
251
  "step": 2700
252
  },
253
  {
254
  "epoch": 24.35,
255
+ "learning_rate": 5.512266666666666e-05,
256
+ "loss": 0.3457,
257
  "step": 2800
258
  },
259
  {
260
  "epoch": 25.22,
261
+ "learning_rate": 5.3256e-05,
262
+ "loss": 0.3356,
263
  "step": 2900
264
  },
265
  {
266
  "epoch": 26.09,
267
+ "learning_rate": 5.1389333333333326e-05,
268
+ "loss": 0.3218,
269
  "step": 3000
270
  },
271
  {
272
  "epoch": 26.09,
273
+ "eval_loss": 0.4894775450229645,
274
+ "eval_runtime": 81.4053,
275
+ "eval_samples_per_second": 20.883,
276
+ "eval_steps_per_second": 1.314,
277
+ "eval_wer": 0.5219323303497746,
278
  "step": 3000
279
  },
280
  {
281
  "epoch": 26.96,
282
+ "learning_rate": 4.9522666666666666e-05,
283
+ "loss": 0.3072,
284
  "step": 3100
285
  },
286
  {
287
  "epoch": 27.83,
288
+ "learning_rate": 4.765599999999999e-05,
289
+ "loss": 0.3006,
290
  "step": 3200
291
  },
292
  {
293
  "epoch": 28.7,
294
+ "learning_rate": 4.578933333333333e-05,
295
+ "loss": 0.2954,
296
  "step": 3300
297
  },
298
  {
299
  "epoch": 28.7,
300
+ "eval_loss": 0.5226009488105774,
301
+ "eval_runtime": 82.2645,
302
+ "eval_samples_per_second": 20.665,
303
+ "eval_steps_per_second": 1.301,
304
+ "eval_wer": 0.5192026915508157,
305
  "step": 3300
306
  },
307
  {
308
  "epoch": 29.57,
309
+ "learning_rate": 4.392266666666666e-05,
310
+ "loss": 0.2965,
311
  "step": 3400
312
  },
313
  {
314
  "epoch": 30.43,
315
+ "learning_rate": 4.2056e-05,
316
+ "loss": 0.286,
317
  "step": 3500
318
  },
319
  {
320
  "epoch": 31.3,
321
+ "learning_rate": 4.018933333333333e-05,
322
+ "loss": 0.287,
323
  "step": 3600
324
  },
325
  {
326
  "epoch": 31.3,
327
+ "eval_loss": 0.495715469121933,
328
+ "eval_runtime": 79.9357,
329
+ "eval_samples_per_second": 21.267,
330
+ "eval_steps_per_second": 1.339,
331
+ "eval_wer": 0.5145686535897924,
332
  "step": 3600
333
  },
334
  {
335
  "epoch": 32.17,
336
+ "learning_rate": 3.8322666666666665e-05,
337
+ "loss": 0.2768,
338
  "step": 3700
339
  },
340
  {
341
  "epoch": 33.04,
342
+ "learning_rate": 3.6456e-05,
343
+ "loss": 0.2731,
344
  "step": 3800
345
  },
346
  {
347
  "epoch": 33.91,
348
+ "learning_rate": 3.458933333333333e-05,
349
+ "loss": 0.2587,
350
  "step": 3900
351
  },
352
  {
353
  "epoch": 33.91,
354
+ "eval_loss": 0.49437007308006287,
355
+ "eval_runtime": 82.64,
356
+ "eval_samples_per_second": 20.571,
357
+ "eval_steps_per_second": 1.295,
358
+ "eval_wer": 0.48930362470640515,
359
  "step": 3900
360
  },
361
  {
362
  "epoch": 34.78,
363
+ "learning_rate": 3.2722666666666664e-05,
364
+ "loss": 0.2549,
365
  "step": 4000
366
  },
367
  {
368
  "epoch": 35.65,
369
+ "learning_rate": 3.0856e-05,
370
+ "loss": 0.2538,
371
  "step": 4100
372
  },
373
  {
374
  "epoch": 36.52,
375
+ "learning_rate": 2.8989333333333334e-05,
376
+ "loss": 0.2496,
377
  "step": 4200
378
  },
379
  {
380
  "epoch": 36.52,
381
+ "eval_loss": 0.4975605010986328,
382
+ "eval_runtime": 82.1209,
383
+ "eval_samples_per_second": 20.701,
384
+ "eval_steps_per_second": 1.303,
385
+ "eval_wer": 0.4894940646226116,
386
  "step": 4200
387
  },
388
  {
389
  "epoch": 37.39,
390
+ "learning_rate": 2.7122666666666667e-05,
391
+ "loss": 0.2461,
392
  "step": 4300
393
  },
394
  {
395
  "epoch": 38.26,
396
+ "learning_rate": 2.5256e-05,
397
+ "loss": 0.2359,
398
  "step": 4400
399
  },
400
  {
401
  "epoch": 39.13,
402
+ "learning_rate": 2.3389333333333333e-05,
403
+ "loss": 0.2365,
404
  "step": 4500
405
  },
406
  {
407
  "epoch": 39.13,
408
+ "eval_loss": 0.5185123085975647,
409
+ "eval_runtime": 79.6732,
410
+ "eval_samples_per_second": 21.337,
411
+ "eval_steps_per_second": 1.343,
412
+ "eval_wer": 0.4818764679743541,
413
  "step": 4500
414
  },
415
  {
416
  "epoch": 40.0,
417
+ "learning_rate": 2.1522666666666666e-05,
418
+ "loss": 0.2357,
419
  "step": 4600
420
  },
421
  {
422
  "epoch": 40.87,
423
+ "learning_rate": 1.9656e-05,
424
+ "loss": 0.2289,
425
  "step": 4700
426
  },
427
  {
428
  "epoch": 41.74,
429
+ "learning_rate": 1.7789333333333333e-05,
430
+ "loss": 0.2264,
431
  "step": 4800
432
  },
433
  {
434
  "epoch": 41.74,
435
+ "eval_loss": 0.5152125954627991,
436
+ "eval_runtime": 79.201,
437
+ "eval_samples_per_second": 21.464,
438
+ "eval_steps_per_second": 1.351,
439
+ "eval_wer": 0.47755982987367485,
440
  "step": 4800
441
  },
442
  {
443
  "epoch": 42.61,
444
+ "learning_rate": 1.5922666666666666e-05,
445
+ "loss": 0.2211,
446
  "step": 4900
447
  },
448
  {
449
  "epoch": 43.48,
450
+ "learning_rate": 1.4055999999999999e-05,
451
+ "loss": 0.2186,
452
  "step": 5000
453
  },
454
  {
455
  "epoch": 44.35,
456
+ "learning_rate": 1.2189333333333332e-05,
457
+ "loss": 0.2224,
458
  "step": 5100
459
  },
460
  {
461
  "epoch": 44.35,
462
+ "eval_loss": 0.5030579566955566,
463
+ "eval_runtime": 80.9089,
464
+ "eval_samples_per_second": 21.011,
465
+ "eval_steps_per_second": 1.322,
466
+ "eval_wer": 0.4745762711864407,
467
  "step": 5100
468
  },
469
  {
470
  "epoch": 45.22,
471
+ "learning_rate": 1.0322666666666665e-05,
472
+ "loss": 0.2162,
473
  "step": 5200
474
  },
475
  {
476
  "epoch": 46.09,
477
+ "learning_rate": 8.456e-06,
478
+ "loss": 0.2159,
479
  "step": 5300
480
  },
481
  {
482
  "epoch": 46.96,
483
+ "learning_rate": 6.589333333333332e-06,
484
+ "loss": 0.2096,
485
  "step": 5400
486
  },
487
  {
488
  "epoch": 46.96,
489
+ "eval_loss": 0.5061585307121277,
490
+ "eval_runtime": 81.3005,
491
+ "eval_samples_per_second": 20.91,
492
+ "eval_steps_per_second": 1.316,
493
+ "eval_wer": 0.47076747286231196,
494
  "step": 5400
495
  },
496
  {
497
  "epoch": 47.83,
498
+ "learning_rate": 4.7226666666666654e-06,
499
+ "loss": 0.205,
500
  "step": 5500
501
  },
502
  {
503
  "epoch": 48.7,
504
+ "learning_rate": 2.856e-06,
505
+ "loss": 0.2038,
506
  "step": 5600
507
  },
508
  {
509
  "epoch": 49.57,
510
+ "learning_rate": 9.893333333333332e-07,
511
+ "loss": 0.2038,
512
  "step": 5700
513
  },
514
  {
515
  "epoch": 49.57,
516
+ "eval_loss": 0.5217297077178955,
517
+ "eval_runtime": 83.7172,
518
+ "eval_samples_per_second": 20.306,
519
+ "eval_steps_per_second": 1.278,
520
+ "eval_wer": 0.46981527328127975,
521
  "step": 5700
522
  },
523
  {
524
  "epoch": 50.0,
525
  "step": 5750,
526
  "total_flos": 2.9609940258263142e+19,
527
+ "train_loss": 1.123646092788033,
528
+ "train_runtime": 13730.8326,
529
+ "train_samples_per_second": 13.386,
530
+ "train_steps_per_second": 0.419
531
  }
532
  ],
533
  "max_steps": 5750,