bagasshw commited on
Commit
47efd17
·
verified ·
1 Parent(s): b5a2c51

End of training

Browse files
README.md CHANGED
@@ -1,15 +1,20 @@
1
  ---
2
  library_name: transformers
 
 
3
  license: apache-2.0
4
  base_model: openai/whisper-tiny
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  datasets:
8
  - jv_id_asr_split
9
  metrics:
10
  - wer
11
  model-index:
12
- - name: whisper-tiny-javanese-openslr-v3
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
@@ -29,7 +34,7 @@ model-index:
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
- # whisper-tiny-javanese-openslr-v3
33
 
34
  This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the jv_id_asr_split dataset.
35
  It achieves the following results on the evaluation set:
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - jv
5
  license: apache-2.0
6
  base_model: openai/whisper-tiny
7
  tags:
8
+ - whisper
9
+ - javanese
10
+ - asr
11
  - generated_from_trainer
12
  datasets:
13
  - jv_id_asr_split
14
  metrics:
15
  - wer
16
  model-index:
17
+ - name: Whisper-Tiny-Java-v3
18
  results:
19
  - task:
20
  name: Automatic Speech Recognition
 
34
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
35
  should probably proofread and complete it, then remove this comment. -->
36
 
37
+ # Whisper-Tiny-Java-v3
38
 
39
  This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the jv_id_asr_split dataset.
40
  It achieves the following results on the evaluation set:
all_results.json CHANGED
@@ -1,8 +1,21 @@
1
  {
 
 
 
 
 
 
 
2
  "pretrained_eval_loss": 4.039339065551758,
3
  "pretrained_eval_model_preparation_time": 0.0024,
4
  "pretrained_eval_runtime": 5840.2863,
5
  "pretrained_eval_samples_per_second": 3.168,
6
  "pretrained_eval_steps_per_second": 0.792,
7
- "pretrained_eval_wer": 1.301261348618616
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 1.0807262117036798,
3
+ "eval_loss": 0.2979792058467865,
4
+ "eval_runtime": 3148.6396,
5
+ "eval_samples": 18504,
6
+ "eval_samples_per_second": 5.877,
7
+ "eval_steps_per_second": 1.469,
8
+ "eval_wer": 0.2586507557925852,
9
  "pretrained_eval_loss": 4.039339065551758,
10
  "pretrained_eval_model_preparation_time": 0.0024,
11
  "pretrained_eval_runtime": 5840.2863,
12
  "pretrained_eval_samples_per_second": 3.168,
13
  "pretrained_eval_steps_per_second": 0.792,
14
+ "pretrained_eval_wer": 1.301261348618616,
15
+ "total_flos": 3.93912009474048e+18,
16
+ "train_loss": 0.09938106536865235,
17
+ "train_runtime": 25030.863,
18
+ "train_samples": 148052,
19
+ "train_samples_per_second": 6.392,
20
+ "train_steps_per_second": 0.4
21
  }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0807262117036798,
3
+ "eval_loss": 0.2979792058467865,
4
+ "eval_runtime": 3148.6396,
5
+ "eval_samples": 18504,
6
+ "eval_samples_per_second": 5.877,
7
+ "eval_steps_per_second": 1.469,
8
+ "eval_wer": 0.2586507557925852
9
+ }
runs/Mar16_11-31-27_dgx-a100/events.out.tfevents.1742127702.dgx-a100.697190.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:966f22d843e92926febc2e74c35913c0b321d884675ffde7467ee6f6147e4eca
3
+ size 406
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0807262117036798,
3
+ "total_flos": 3.93912009474048e+18,
4
+ "train_loss": 0.09938106536865235,
5
+ "train_runtime": 25030.863,
6
+ "train_samples": 148052,
7
+ "train_samples_per_second": 6.392,
8
+ "train_steps_per_second": 0.4
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,923 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 10000,
3
+ "best_metric": 0.2586507557925852,
4
+ "best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v3/checkpoint-10000",
5
+ "epoch": 1.0807262117036798,
6
+ "eval_steps": 500,
7
+ "global_step": 10000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.01080672178094775,
14
+ "grad_norm": 25.346445083618164,
15
+ "learning_rate": 1.94e-06,
16
+ "loss": 3.5433,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.0216134435618955,
21
+ "grad_norm": 21.656307220458984,
22
+ "learning_rate": 3.94e-06,
23
+ "loss": 2.0264,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.03242016534284325,
28
+ "grad_norm": 18.657211303710938,
29
+ "learning_rate": 5.94e-06,
30
+ "loss": 1.5688,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.043226887123791,
35
+ "grad_norm": 16.42237663269043,
36
+ "learning_rate": 7.94e-06,
37
+ "loss": 1.3214,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.054033608904738746,
42
+ "grad_norm": 18.631206512451172,
43
+ "learning_rate": 9.940000000000001e-06,
44
+ "loss": 1.1788,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.054033608904738746,
49
+ "eval_loss": 0.967095136642456,
50
+ "eval_runtime": 5770.5819,
51
+ "eval_samples_per_second": 3.207,
52
+ "eval_steps_per_second": 0.802,
53
+ "eval_wer": 0.6590292385770924,
54
+ "step": 500
55
+ },
56
+ {
57
+ "epoch": 0.0648403306856865,
58
+ "grad_norm": 15.337555885314941,
59
+ "learning_rate": 1.1940000000000001e-05,
60
+ "loss": 1.0627,
61
+ "step": 600
62
+ },
63
+ {
64
+ "epoch": 0.07564705246663425,
65
+ "grad_norm": 14.623177528381348,
66
+ "learning_rate": 1.394e-05,
67
+ "loss": 0.9632,
68
+ "step": 700
69
+ },
70
+ {
71
+ "epoch": 0.086453774247582,
72
+ "grad_norm": 17.126712799072266,
73
+ "learning_rate": 1.5940000000000003e-05,
74
+ "loss": 0.906,
75
+ "step": 800
76
+ },
77
+ {
78
+ "epoch": 0.09726049602852975,
79
+ "grad_norm": 16.75067710876465,
80
+ "learning_rate": 1.794e-05,
81
+ "loss": 0.8503,
82
+ "step": 900
83
+ },
84
+ {
85
+ "epoch": 0.10806721780947749,
86
+ "grad_norm": 14.265076637268066,
87
+ "learning_rate": 1.9940000000000002e-05,
88
+ "loss": 0.8015,
89
+ "step": 1000
90
+ },
91
+ {
92
+ "epoch": 0.10806721780947749,
93
+ "eval_loss": 0.6976613402366638,
94
+ "eval_runtime": 5463.0331,
95
+ "eval_samples_per_second": 3.387,
96
+ "eval_steps_per_second": 0.847,
97
+ "eval_wer": 0.5304858499049883,
98
+ "step": 1000
99
+ },
100
+ {
101
+ "epoch": 0.11887393959042525,
102
+ "grad_norm": 13.737130165100098,
103
+ "learning_rate": 1.9784444444444446e-05,
104
+ "loss": 0.7589,
105
+ "step": 1100
106
+ },
107
+ {
108
+ "epoch": 0.129680661371373,
109
+ "grad_norm": 18.01378631591797,
110
+ "learning_rate": 1.9562222222222225e-05,
111
+ "loss": 0.7589,
112
+ "step": 1200
113
+ },
114
+ {
115
+ "epoch": 0.14048738315232073,
116
+ "grad_norm": 11.696120262145996,
117
+ "learning_rate": 1.934e-05,
118
+ "loss": 0.7087,
119
+ "step": 1300
120
+ },
121
+ {
122
+ "epoch": 0.1512941049332685,
123
+ "grad_norm": 13.419560432434082,
124
+ "learning_rate": 1.911777777777778e-05,
125
+ "loss": 0.683,
126
+ "step": 1400
127
+ },
128
+ {
129
+ "epoch": 0.16210082671421625,
130
+ "grad_norm": 12.753211975097656,
131
+ "learning_rate": 1.8895555555555557e-05,
132
+ "loss": 0.6498,
133
+ "step": 1500
134
+ },
135
+ {
136
+ "epoch": 0.16210082671421625,
137
+ "eval_loss": 0.5724753737449646,
138
+ "eval_runtime": 4564.4621,
139
+ "eval_samples_per_second": 4.054,
140
+ "eval_steps_per_second": 1.013,
141
+ "eval_wer": 0.6670133485560569,
142
+ "step": 1500
143
+ },
144
+ {
145
+ "epoch": 0.172907548495164,
146
+ "grad_norm": 11.64907455444336,
147
+ "learning_rate": 1.8673333333333333e-05,
148
+ "loss": 0.6216,
149
+ "step": 1600
150
+ },
151
+ {
152
+ "epoch": 0.18371427027611173,
153
+ "grad_norm": 13.781865119934082,
154
+ "learning_rate": 1.8451111111111113e-05,
155
+ "loss": 0.6138,
156
+ "step": 1700
157
+ },
158
+ {
159
+ "epoch": 0.1945209920570595,
160
+ "grad_norm": 12.58388900756836,
161
+ "learning_rate": 1.822888888888889e-05,
162
+ "loss": 0.595,
163
+ "step": 1800
164
+ },
165
+ {
166
+ "epoch": 0.20532771383800724,
167
+ "grad_norm": 14.661055564880371,
168
+ "learning_rate": 1.8006666666666668e-05,
169
+ "loss": 0.5938,
170
+ "step": 1900
171
+ },
172
+ {
173
+ "epoch": 0.21613443561895498,
174
+ "grad_norm": 11.948161125183105,
175
+ "learning_rate": 1.7784444444444448e-05,
176
+ "loss": 0.5828,
177
+ "step": 2000
178
+ },
179
+ {
180
+ "epoch": 0.21613443561895498,
181
+ "eval_loss": 0.5093731880187988,
182
+ "eval_runtime": 5328.6402,
183
+ "eval_samples_per_second": 3.473,
184
+ "eval_steps_per_second": 0.868,
185
+ "eval_wer": 0.4828939857208768,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 0.22694115739990273,
190
+ "grad_norm": 12.322188377380371,
191
+ "learning_rate": 1.7562222222222224e-05,
192
+ "loss": 0.5752,
193
+ "step": 2100
194
+ },
195
+ {
196
+ "epoch": 0.2377478791808505,
197
+ "grad_norm": 17.046159744262695,
198
+ "learning_rate": 1.734e-05,
199
+ "loss": 0.5663,
200
+ "step": 2200
201
+ },
202
+ {
203
+ "epoch": 0.24855460096179824,
204
+ "grad_norm": 10.154263496398926,
205
+ "learning_rate": 1.711777777777778e-05,
206
+ "loss": 0.537,
207
+ "step": 2300
208
+ },
209
+ {
210
+ "epoch": 0.259361322742746,
211
+ "grad_norm": 11.958285331726074,
212
+ "learning_rate": 1.6895555555555556e-05,
213
+ "loss": 0.5246,
214
+ "step": 2400
215
+ },
216
+ {
217
+ "epoch": 0.27016804452369375,
218
+ "grad_norm": 10.264266014099121,
219
+ "learning_rate": 1.6673333333333335e-05,
220
+ "loss": 0.5226,
221
+ "step": 2500
222
+ },
223
+ {
224
+ "epoch": 0.27016804452369375,
225
+ "eval_loss": 0.46415480971336365,
226
+ "eval_runtime": 4645.745,
227
+ "eval_samples_per_second": 3.983,
228
+ "eval_steps_per_second": 0.996,
229
+ "eval_wer": 0.38602898052064843,
230
+ "step": 2500
231
+ },
232
+ {
233
+ "epoch": 0.28097476630464147,
234
+ "grad_norm": 12.049257278442383,
235
+ "learning_rate": 1.6451111111111115e-05,
236
+ "loss": 0.493,
237
+ "step": 2600
238
+ },
239
+ {
240
+ "epoch": 0.29178148808558924,
241
+ "grad_norm": 9.821508407592773,
242
+ "learning_rate": 1.622888888888889e-05,
243
+ "loss": 0.5153,
244
+ "step": 2700
245
+ },
246
+ {
247
+ "epoch": 0.302588209866537,
248
+ "grad_norm": 10.481095314025879,
249
+ "learning_rate": 1.6006666666666667e-05,
250
+ "loss": 0.5,
251
+ "step": 2800
252
+ },
253
+ {
254
+ "epoch": 0.3133949316474847,
255
+ "grad_norm": 10.193309783935547,
256
+ "learning_rate": 1.5784444444444447e-05,
257
+ "loss": 0.5248,
258
+ "step": 2900
259
+ },
260
+ {
261
+ "epoch": 0.3242016534284325,
262
+ "grad_norm": 12.328668594360352,
263
+ "learning_rate": 1.5562222222222223e-05,
264
+ "loss": 0.4955,
265
+ "step": 3000
266
+ },
267
+ {
268
+ "epoch": 0.3242016534284325,
269
+ "eval_loss": 0.4340818226337433,
270
+ "eval_runtime": 4456.9484,
271
+ "eval_samples_per_second": 4.152,
272
+ "eval_steps_per_second": 1.038,
273
+ "eval_wer": 0.39154200455117727,
274
+ "step": 3000
275
+ },
276
+ {
277
+ "epoch": 0.33500837520938026,
278
+ "grad_norm": 12.583343505859375,
279
+ "learning_rate": 1.5340000000000002e-05,
280
+ "loss": 0.5082,
281
+ "step": 3100
282
+ },
283
+ {
284
+ "epoch": 0.345815096990328,
285
+ "grad_norm": 8.40932846069336,
286
+ "learning_rate": 1.511777777777778e-05,
287
+ "loss": 0.4905,
288
+ "step": 3200
289
+ },
290
+ {
291
+ "epoch": 0.35662181877127574,
292
+ "grad_norm": 14.150980949401855,
293
+ "learning_rate": 1.4895555555555556e-05,
294
+ "loss": 0.466,
295
+ "step": 3300
296
+ },
297
+ {
298
+ "epoch": 0.36742854055222346,
299
+ "grad_norm": 13.014771461486816,
300
+ "learning_rate": 1.4673333333333336e-05,
301
+ "loss": 0.4788,
302
+ "step": 3400
303
+ },
304
+ {
305
+ "epoch": 0.37823526233317123,
306
+ "grad_norm": 11.843710899353027,
307
+ "learning_rate": 1.4451111111111112e-05,
308
+ "loss": 0.4616,
309
+ "step": 3500
310
+ },
311
+ {
312
+ "epoch": 0.37823526233317123,
313
+ "eval_loss": 0.4127795398235321,
314
+ "eval_runtime": 4528.1925,
315
+ "eval_samples_per_second": 4.086,
316
+ "eval_steps_per_second": 1.022,
317
+ "eval_wer": 0.35399088200564593,
318
+ "step": 3500
319
+ },
320
+ {
321
+ "epoch": 0.389041984114119,
322
+ "grad_norm": 11.520469665527344,
323
+ "learning_rate": 1.422888888888889e-05,
324
+ "loss": 0.4695,
325
+ "step": 3600
326
+ },
327
+ {
328
+ "epoch": 0.3998487058950667,
329
+ "grad_norm": 10.21032428741455,
330
+ "learning_rate": 1.400666666666667e-05,
331
+ "loss": 0.47,
332
+ "step": 3700
333
+ },
334
+ {
335
+ "epoch": 0.4106554276760145,
336
+ "grad_norm": 9.393896102905273,
337
+ "learning_rate": 1.3784444444444445e-05,
338
+ "loss": 0.4656,
339
+ "step": 3800
340
+ },
341
+ {
342
+ "epoch": 0.42146214945696225,
343
+ "grad_norm": 10.503016471862793,
344
+ "learning_rate": 1.3562222222222223e-05,
345
+ "loss": 0.4446,
346
+ "step": 3900
347
+ },
348
+ {
349
+ "epoch": 0.43226887123790997,
350
+ "grad_norm": 10.747596740722656,
351
+ "learning_rate": 1.3340000000000001e-05,
352
+ "loss": 0.4474,
353
+ "step": 4000
354
+ },
355
+ {
356
+ "epoch": 0.43226887123790997,
357
+ "eval_loss": 0.3900074064731598,
358
+ "eval_runtime": 4858.8536,
359
+ "eval_samples_per_second": 3.808,
360
+ "eval_steps_per_second": 0.952,
361
+ "eval_wer": 0.36136504038974343,
362
+ "step": 4000
363
+ },
364
+ {
365
+ "epoch": 0.44307559301885774,
366
+ "grad_norm": 13.275285720825195,
367
+ "learning_rate": 1.3117777777777779e-05,
368
+ "loss": 0.4488,
369
+ "step": 4100
370
+ },
371
+ {
372
+ "epoch": 0.45388231479980545,
373
+ "grad_norm": 11.318832397460938,
374
+ "learning_rate": 1.2897777777777778e-05,
375
+ "loss": 0.4292,
376
+ "step": 4200
377
+ },
378
+ {
379
+ "epoch": 0.4646890365807532,
380
+ "grad_norm": 10.3064546585083,
381
+ "learning_rate": 1.2675555555555557e-05,
382
+ "loss": 0.4302,
383
+ "step": 4300
384
+ },
385
+ {
386
+ "epoch": 0.475495758361701,
387
+ "grad_norm": 11.634562492370605,
388
+ "learning_rate": 1.2453333333333335e-05,
389
+ "loss": 0.426,
390
+ "step": 4400
391
+ },
392
+ {
393
+ "epoch": 0.4863024801426487,
394
+ "grad_norm": 10.647918701171875,
395
+ "learning_rate": 1.2231111111111111e-05,
396
+ "loss": 0.4387,
397
+ "step": 4500
398
+ },
399
+ {
400
+ "epoch": 0.4863024801426487,
401
+ "eval_loss": 0.37359631061553955,
402
+ "eval_runtime": 4990.4878,
403
+ "eval_samples_per_second": 3.708,
404
+ "eval_steps_per_second": 0.927,
405
+ "eval_wer": 0.35633684967821144,
406
+ "step": 4500
407
+ },
408
+ {
409
+ "epoch": 0.4971092019235965,
410
+ "grad_norm": 9.396610260009766,
411
+ "learning_rate": 1.200888888888889e-05,
412
+ "loss": 0.4195,
413
+ "step": 4600
414
+ },
415
+ {
416
+ "epoch": 0.5079159237045442,
417
+ "grad_norm": 10.845105171203613,
418
+ "learning_rate": 1.1786666666666668e-05,
419
+ "loss": 0.4056,
420
+ "step": 4700
421
+ },
422
+ {
423
+ "epoch": 0.518722645485492,
424
+ "grad_norm": 9.404190063476562,
425
+ "learning_rate": 1.1564444444444445e-05,
426
+ "loss": 0.4306,
427
+ "step": 4800
428
+ },
429
+ {
430
+ "epoch": 0.5295293672664397,
431
+ "grad_norm": 9.176289558410645,
432
+ "learning_rate": 1.1342222222222224e-05,
433
+ "loss": 0.4239,
434
+ "step": 4900
435
+ },
436
+ {
437
+ "epoch": 0.5403360890473875,
438
+ "grad_norm": 10.088706016540527,
439
+ "learning_rate": 1.1120000000000002e-05,
440
+ "loss": 0.4154,
441
+ "step": 5000
442
+ },
443
+ {
444
+ "epoch": 0.5403360890473875,
445
+ "eval_loss": 0.36057594418525696,
446
+ "eval_runtime": 5945.658,
447
+ "eval_samples_per_second": 3.112,
448
+ "eval_steps_per_second": 0.778,
449
+ "eval_wer": 0.32743452795220485,
450
+ "step": 5000
451
+ },
452
+ {
453
+ "epoch": 0.5511428108283353,
454
+ "grad_norm": 9.688194274902344,
455
+ "learning_rate": 1.0897777777777778e-05,
456
+ "loss": 0.4115,
457
+ "step": 5100
458
+ },
459
+ {
460
+ "epoch": 0.5619495326092829,
461
+ "grad_norm": 9.752260208129883,
462
+ "learning_rate": 1.0675555555555558e-05,
463
+ "loss": 0.3854,
464
+ "step": 5200
465
+ },
466
+ {
467
+ "epoch": 0.5727562543902307,
468
+ "grad_norm": 10.447392463684082,
469
+ "learning_rate": 1.0453333333333334e-05,
470
+ "loss": 0.4141,
471
+ "step": 5300
472
+ },
473
+ {
474
+ "epoch": 0.5835629761711785,
475
+ "grad_norm": 11.185776710510254,
476
+ "learning_rate": 1.0231111111111112e-05,
477
+ "loss": 0.3924,
478
+ "step": 5400
479
+ },
480
+ {
481
+ "epoch": 0.5943696979521262,
482
+ "grad_norm": 10.3914794921875,
483
+ "learning_rate": 1.000888888888889e-05,
484
+ "loss": 0.419,
485
+ "step": 5500
486
+ },
487
+ {
488
+ "epoch": 0.5943696979521262,
489
+ "eval_loss": 0.3494803309440613,
490
+ "eval_runtime": 6902.9208,
491
+ "eval_samples_per_second": 2.681,
492
+ "eval_steps_per_second": 0.67,
493
+ "eval_wer": 0.314375307908257,
494
+ "step": 5500
495
+ },
496
+ {
497
+ "epoch": 0.605176419733074,
498
+ "grad_norm": 11.420536041259766,
499
+ "learning_rate": 9.786666666666667e-06,
500
+ "loss": 0.4096,
501
+ "step": 5600
502
+ },
503
+ {
504
+ "epoch": 0.6159831415140217,
505
+ "grad_norm": 9.05328369140625,
506
+ "learning_rate": 9.564444444444445e-06,
507
+ "loss": 0.3917,
508
+ "step": 5700
509
+ },
510
+ {
511
+ "epoch": 0.6267898632949694,
512
+ "grad_norm": 10.281911849975586,
513
+ "learning_rate": 9.342222222222223e-06,
514
+ "loss": 0.3965,
515
+ "step": 5800
516
+ },
517
+ {
518
+ "epoch": 0.6375965850759172,
519
+ "grad_norm": 10.587265014648438,
520
+ "learning_rate": 9.12e-06,
521
+ "loss": 0.374,
522
+ "step": 5900
523
+ },
524
+ {
525
+ "epoch": 0.648403306856865,
526
+ "grad_norm": 7.721372127532959,
527
+ "learning_rate": 8.897777777777779e-06,
528
+ "loss": 0.3799,
529
+ "step": 6000
530
+ },
531
+ {
532
+ "epoch": 0.648403306856865,
533
+ "eval_loss": 0.3397567868232727,
534
+ "eval_runtime": 7002.8513,
535
+ "eval_samples_per_second": 2.642,
536
+ "eval_steps_per_second": 0.661,
537
+ "eval_wer": 0.2921668139413039,
538
+ "step": 6000
539
+ },
540
+ {
541
+ "epoch": 0.6592100286378128,
542
+ "grad_norm": 6.785597324371338,
543
+ "learning_rate": 8.675555555555556e-06,
544
+ "loss": 0.3953,
545
+ "step": 6100
546
+ },
547
+ {
548
+ "epoch": 0.6700167504187605,
549
+ "grad_norm": 9.53781509399414,
550
+ "learning_rate": 8.453333333333334e-06,
551
+ "loss": 0.3786,
552
+ "step": 6200
553
+ },
554
+ {
555
+ "epoch": 0.6808234721997082,
556
+ "grad_norm": 8.857239723205566,
557
+ "learning_rate": 8.231111111111112e-06,
558
+ "loss": 0.3744,
559
+ "step": 6300
560
+ },
561
+ {
562
+ "epoch": 0.691630193980656,
563
+ "grad_norm": 9.638261795043945,
564
+ "learning_rate": 8.00888888888889e-06,
565
+ "loss": 0.3809,
566
+ "step": 6400
567
+ },
568
+ {
569
+ "epoch": 0.7024369157616037,
570
+ "grad_norm": 8.304004669189453,
571
+ "learning_rate": 7.786666666666666e-06,
572
+ "loss": 0.3802,
573
+ "step": 6500
574
+ },
575
+ {
576
+ "epoch": 0.7024369157616037,
577
+ "eval_loss": 0.3289755880832672,
578
+ "eval_runtime": 5885.8991,
579
+ "eval_samples_per_second": 3.144,
580
+ "eval_steps_per_second": 0.786,
581
+ "eval_wer": 0.3044049452998538,
582
+ "step": 6500
583
+ },
584
+ {
585
+ "epoch": 0.7132436375425515,
586
+ "grad_norm": 9.978581428527832,
587
+ "learning_rate": 7.564444444444446e-06,
588
+ "loss": 0.3537,
589
+ "step": 6600
590
+ },
591
+ {
592
+ "epoch": 0.7240503593234993,
593
+ "grad_norm": 10.849929809570312,
594
+ "learning_rate": 7.342222222222223e-06,
595
+ "loss": 0.3762,
596
+ "step": 6700
597
+ },
598
+ {
599
+ "epoch": 0.7348570811044469,
600
+ "grad_norm": 11.856138229370117,
601
+ "learning_rate": 7.1200000000000004e-06,
602
+ "loss": 0.3477,
603
+ "step": 6800
604
+ },
605
+ {
606
+ "epoch": 0.7456638028853947,
607
+ "grad_norm": 10.761491775512695,
608
+ "learning_rate": 6.897777777777779e-06,
609
+ "loss": 0.361,
610
+ "step": 6900
611
+ },
612
+ {
613
+ "epoch": 0.7564705246663425,
614
+ "grad_norm": 9.24421501159668,
615
+ "learning_rate": 6.675555555555556e-06,
616
+ "loss": 0.3611,
617
+ "step": 7000
618
+ },
619
+ {
620
+ "epoch": 0.7564705246663425,
621
+ "eval_loss": 0.3224972188472748,
622
+ "eval_runtime": 5632.6127,
623
+ "eval_samples_per_second": 3.285,
624
+ "eval_steps_per_second": 0.821,
625
+ "eval_wer": 0.2823372093932546,
626
+ "step": 7000
627
+ },
628
+ {
629
+ "epoch": 0.7672772464472902,
630
+ "grad_norm": 10.52470874786377,
631
+ "learning_rate": 6.453333333333334e-06,
632
+ "loss": 0.3638,
633
+ "step": 7100
634
+ },
635
+ {
636
+ "epoch": 0.778083968228238,
637
+ "grad_norm": 9.080463409423828,
638
+ "learning_rate": 6.231111111111111e-06,
639
+ "loss": 0.3532,
640
+ "step": 7200
641
+ },
642
+ {
643
+ "epoch": 0.7888906900091858,
644
+ "grad_norm": 8.789374351501465,
645
+ "learning_rate": 6.00888888888889e-06,
646
+ "loss": 0.3592,
647
+ "step": 7300
648
+ },
649
+ {
650
+ "epoch": 0.7996974117901334,
651
+ "grad_norm": 8.97732162475586,
652
+ "learning_rate": 5.7866666666666674e-06,
653
+ "loss": 0.3611,
654
+ "step": 7400
655
+ },
656
+ {
657
+ "epoch": 0.8105041335710812,
658
+ "grad_norm": 10.455592155456543,
659
+ "learning_rate": 5.5644444444444444e-06,
660
+ "loss": 0.3548,
661
+ "step": 7500
662
+ },
663
+ {
664
+ "epoch": 0.8105041335710812,
665
+ "eval_loss": 0.31678903102874756,
666
+ "eval_runtime": 3060.9871,
667
+ "eval_samples_per_second": 6.045,
668
+ "eval_steps_per_second": 1.511,
669
+ "eval_wer": 0.27332869353060313,
670
+ "step": 7500
671
+ },
672
+ {
673
+ "epoch": 0.821310855352029,
674
+ "grad_norm": 8.56920051574707,
675
+ "learning_rate": 5.342222222222223e-06,
676
+ "loss": 0.3628,
677
+ "step": 7600
678
+ },
679
+ {
680
+ "epoch": 0.8321175771329767,
681
+ "grad_norm": 11.37761402130127,
682
+ "learning_rate": 5.12e-06,
683
+ "loss": 0.3353,
684
+ "step": 7700
685
+ },
686
+ {
687
+ "epoch": 0.8429242989139245,
688
+ "grad_norm": 9.396086692810059,
689
+ "learning_rate": 4.897777777777778e-06,
690
+ "loss": 0.3704,
691
+ "step": 7800
692
+ },
693
+ {
694
+ "epoch": 0.8537310206948722,
695
+ "grad_norm": 10.0977144241333,
696
+ "learning_rate": 4.677777777777778e-06,
697
+ "loss": 0.364,
698
+ "step": 7900
699
+ },
700
+ {
701
+ "epoch": 0.8645377424758199,
702
+ "grad_norm": 8.653088569641113,
703
+ "learning_rate": 4.455555555555555e-06,
704
+ "loss": 0.346,
705
+ "step": 8000
706
+ },
707
+ {
708
+ "epoch": 0.8645377424758199,
709
+ "eval_loss": 0.3104597330093384,
710
+ "eval_runtime": 3053.8514,
711
+ "eval_samples_per_second": 6.059,
712
+ "eval_steps_per_second": 1.515,
713
+ "eval_wer": 0.26601709428444076,
714
+ "step": 8000
715
+ },
716
+ {
717
+ "epoch": 0.8753444642567677,
718
+ "grad_norm": 9.058122634887695,
719
+ "learning_rate": 4.233333333333334e-06,
720
+ "loss": 0.3382,
721
+ "step": 8100
722
+ },
723
+ {
724
+ "epoch": 0.8861511860377155,
725
+ "grad_norm": 12.135452270507812,
726
+ "learning_rate": 4.011111111111111e-06,
727
+ "loss": 0.3456,
728
+ "step": 8200
729
+ },
730
+ {
731
+ "epoch": 0.8969579078186632,
732
+ "grad_norm": 6.601293563842773,
733
+ "learning_rate": 3.7888888888888893e-06,
734
+ "loss": 0.3404,
735
+ "step": 8300
736
+ },
737
+ {
738
+ "epoch": 0.9077646295996109,
739
+ "grad_norm": 9.51930046081543,
740
+ "learning_rate": 3.566666666666667e-06,
741
+ "loss": 0.3479,
742
+ "step": 8400
743
+ },
744
+ {
745
+ "epoch": 0.9185713513805587,
746
+ "grad_norm": 7.031350135803223,
747
+ "learning_rate": 3.3444444444444445e-06,
748
+ "loss": 0.3547,
749
+ "step": 8500
750
+ },
751
+ {
752
+ "epoch": 0.9185713513805587,
753
+ "eval_loss": 0.3063461184501648,
754
+ "eval_runtime": 3070.7291,
755
+ "eval_samples_per_second": 6.026,
756
+ "eval_steps_per_second": 1.506,
757
+ "eval_wer": 0.27081068822871623,
758
+ "step": 8500
759
+ },
760
+ {
761
+ "epoch": 0.9293780731615064,
762
+ "grad_norm": 11.10822868347168,
763
+ "learning_rate": 3.1222222222222228e-06,
764
+ "loss": 0.3454,
765
+ "step": 8600
766
+ },
767
+ {
768
+ "epoch": 0.9401847949424542,
769
+ "grad_norm": 9.607211112976074,
770
+ "learning_rate": 2.9e-06,
771
+ "loss": 0.3319,
772
+ "step": 8700
773
+ },
774
+ {
775
+ "epoch": 0.950991516723402,
776
+ "grad_norm": 10.614663124084473,
777
+ "learning_rate": 2.677777777777778e-06,
778
+ "loss": 0.3441,
779
+ "step": 8800
780
+ },
781
+ {
782
+ "epoch": 0.9617982385043498,
783
+ "grad_norm": 8.344138145446777,
784
+ "learning_rate": 2.455555555555556e-06,
785
+ "loss": 0.3466,
786
+ "step": 8900
787
+ },
788
+ {
789
+ "epoch": 0.9726049602852974,
790
+ "grad_norm": 11.955930709838867,
791
+ "learning_rate": 2.2333333333333333e-06,
792
+ "loss": 0.3211,
793
+ "step": 9000
794
+ },
795
+ {
796
+ "epoch": 0.9726049602852974,
797
+ "eval_loss": 0.30189329385757446,
798
+ "eval_runtime": 3095.8164,
799
+ "eval_samples_per_second": 5.977,
800
+ "eval_steps_per_second": 1.494,
801
+ "eval_wer": 0.28268910454413937,
802
+ "step": 9000
803
+ },
804
+ {
805
+ "epoch": 0.9834116820662452,
806
+ "grad_norm": 9.438616752624512,
807
+ "learning_rate": 2.011111111111111e-06,
808
+ "loss": 0.343,
809
+ "step": 9100
810
+ },
811
+ {
812
+ "epoch": 0.994218403847193,
813
+ "grad_norm": 10.029309272766113,
814
+ "learning_rate": 1.788888888888889e-06,
815
+ "loss": 0.3582,
816
+ "step": 9200
817
+ },
818
+ {
819
+ "epoch": 1.0050791592370454,
820
+ "grad_norm": 9.47360610961914,
821
+ "learning_rate": 1.566666666666667e-06,
822
+ "loss": 0.3024,
823
+ "step": 9300
824
+ },
825
+ {
826
+ "epoch": 1.0158858810179932,
827
+ "grad_norm": 9.3403959274292,
828
+ "learning_rate": 1.3444444444444446e-06,
829
+ "loss": 0.2811,
830
+ "step": 9400
831
+ },
832
+ {
833
+ "epoch": 1.026692602798941,
834
+ "grad_norm": 9.723664283752441,
835
+ "learning_rate": 1.1222222222222222e-06,
836
+ "loss": 0.2718,
837
+ "step": 9500
838
+ },
839
+ {
840
+ "epoch": 1.026692602798941,
841
+ "eval_loss": 0.2989746034145355,
842
+ "eval_runtime": 3189.5179,
843
+ "eval_samples_per_second": 5.802,
844
+ "eval_steps_per_second": 1.45,
845
+ "eval_wer": 0.2659936346077151,
846
+ "step": 9500
847
+ },
848
+ {
849
+ "epoch": 1.0374993245798887,
850
+ "grad_norm": 7.739469051361084,
851
+ "learning_rate": 9.000000000000001e-07,
852
+ "loss": 0.2765,
853
+ "step": 9600
854
+ },
855
+ {
856
+ "epoch": 1.0483060463608365,
857
+ "grad_norm": 8.379693984985352,
858
+ "learning_rate": 6.777777777777779e-07,
859
+ "loss": 0.2872,
860
+ "step": 9700
861
+ },
862
+ {
863
+ "epoch": 1.0591127681417842,
864
+ "grad_norm": 8.849838256835938,
865
+ "learning_rate": 4.5555555555555563e-07,
866
+ "loss": 0.2782,
867
+ "step": 9800
868
+ },
869
+ {
870
+ "epoch": 1.069919489922732,
871
+ "grad_norm": 8.006597518920898,
872
+ "learning_rate": 2.3333333333333336e-07,
873
+ "loss": 0.2673,
874
+ "step": 9900
875
+ },
876
+ {
877
+ "epoch": 1.0807262117036798,
878
+ "grad_norm": 10.859480857849121,
879
+ "learning_rate": 1.1111111111111112e-08,
880
+ "loss": 0.2859,
881
+ "step": 10000
882
+ },
883
+ {
884
+ "epoch": 1.0807262117036798,
885
+ "eval_loss": 0.2979792058467865,
886
+ "eval_runtime": 3174.7879,
887
+ "eval_samples_per_second": 5.828,
888
+ "eval_steps_per_second": 1.457,
889
+ "eval_wer": 0.2586507557925852,
890
+ "step": 10000
891
+ },
892
+ {
893
+ "epoch": 1.0807262117036798,
894
+ "step": 10000,
895
+ "total_flos": 3.93912009474048e+18,
896
+ "train_loss": 0.09938106536865235,
897
+ "train_runtime": 25030.863,
898
+ "train_samples_per_second": 6.392,
899
+ "train_steps_per_second": 0.4
900
+ }
901
+ ],
902
+ "logging_steps": 100,
903
+ "max_steps": 10000,
904
+ "num_input_tokens_seen": 0,
905
+ "num_train_epochs": 2,
906
+ "save_steps": 500,
907
+ "stateful_callbacks": {
908
+ "TrainerControl": {
909
+ "args": {
910
+ "should_epoch_stop": false,
911
+ "should_evaluate": false,
912
+ "should_log": false,
913
+ "should_save": true,
914
+ "should_training_stop": true
915
+ },
916
+ "attributes": {}
917
+ }
918
+ },
919
+ "total_flos": 3.93912009474048e+18,
920
+ "train_batch_size": 8,
921
+ "trial_name": null,
922
+ "trial_params": null
923
+ }