End of training
Browse files- README.md +17 -4
- all_results.json +11 -10
- eval_results.json +6 -6
- train_results.json +6 -5
- trainer_state.json +1707 -623
- wandb/run-20250212_152709-lejyafmi/files/output.log +168 -0
- wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb +2 -2
README.md
CHANGED
@@ -3,20 +3,33 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: openai/whisper-small
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
|
|
|
|
7 |
metrics:
|
8 |
- wer
|
9 |
model-index:
|
10 |
-
- name:
|
11 |
-
results:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
---
|
13 |
|
14 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
should probably proofread and complete it, then remove this comment. -->
|
16 |
|
17 |
-
#
|
18 |
|
19 |
-
This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on
|
20 |
It achieves the following results on the evaluation set:
|
21 |
- Loss: 0.1836
|
22 |
- Wer: 10.8862
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: openai/whisper-small
|
5 |
tags:
|
6 |
+
- whisper-event
|
7 |
- generated_from_trainer
|
8 |
+
datasets:
|
9 |
+
- asierhv/composite_corpus_eu_v2.1
|
10 |
metrics:
|
11 |
- wer
|
12 |
model-index:
|
13 |
+
- name: Whisper Small Basque
|
14 |
+
results:
|
15 |
+
- task:
|
16 |
+
name: Automatic Speech Recognition
|
17 |
+
type: automatic-speech-recognition
|
18 |
+
dataset:
|
19 |
+
name: asierhv/composite_corpus_eu_v2.1
|
20 |
+
type: asierhv/composite_corpus_eu_v2.1
|
21 |
+
metrics:
|
22 |
+
- name: Wer
|
23 |
+
type: wer
|
24 |
+
value: 10.886229784051602
|
25 |
---
|
26 |
|
27 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
28 |
should probably proofread and complete it, then remove this comment. -->
|
29 |
|
30 |
+
# Whisper Small Basque
|
31 |
|
32 |
+
This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the asierhv/composite_corpus_eu_v2.1 dataset.
|
33 |
It achieves the following results on the evaluation set:
|
34 |
- Loss: 0.1836
|
35 |
- Wer: 10.8862
|
all_results.json
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second":
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
-
"eval_wer":
|
8 |
-
"
|
9 |
-
"
|
10 |
-
"
|
11 |
-
"
|
|
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.1835634410381317,
|
4 |
+
"eval_runtime": 151.3822,
|
5 |
+
"eval_samples_per_second": 13.899,
|
6 |
+
"eval_steps_per_second": 0.872,
|
7 |
+
"eval_wer": 10.886229784051602,
|
8 |
+
"total_flos": 7.387786248192e+19,
|
9 |
+
"train_loss": 0.17036041705310345,
|
10 |
+
"train_runtime": 11036.9074,
|
11 |
+
"train_samples_per_second": 23.195,
|
12 |
+
"train_steps_per_second": 0.725
|
13 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second":
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
-
"eval_wer":
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.1835634410381317,
|
4 |
+
"eval_runtime": 151.3822,
|
5 |
+
"eval_samples_per_second": 13.899,
|
6 |
+
"eval_steps_per_second": 0.872,
|
7 |
+
"eval_wer": 10.886229784051602
|
8 |
}
|
train_results.json
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
|
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"total_flos": 7.387786248192e+19,
|
4 |
+
"train_loss": 0.17036041705310345,
|
5 |
+
"train_runtime": 11036.9074,
|
6 |
+
"train_samples_per_second": 23.195,
|
7 |
+
"train_steps_per_second": 0.725
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,1270 +1,2354 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "./checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"
|
|
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
-
"epoch": 0.
|
|
|
12 |
"learning_rate": 4.4e-07,
|
13 |
-
"loss":
|
14 |
"step": 25
|
15 |
},
|
16 |
{
|
17 |
-
"epoch": 0.
|
|
|
18 |
"learning_rate": 9.400000000000001e-07,
|
19 |
-
"loss": 1.
|
20 |
"step": 50
|
21 |
},
|
22 |
{
|
23 |
-
"epoch": 0.
|
|
|
24 |
"learning_rate": 1.44e-06,
|
25 |
-
"loss": 1.
|
26 |
"step": 75
|
27 |
},
|
28 |
{
|
29 |
-
"epoch": 0.
|
|
|
30 |
"learning_rate": 1.94e-06,
|
31 |
-
"loss": 0.
|
32 |
"step": 100
|
33 |
},
|
34 |
{
|
35 |
-
"epoch": 0.
|
|
|
36 |
"learning_rate": 2.4400000000000004e-06,
|
37 |
-
"loss": 0.
|
38 |
"step": 125
|
39 |
},
|
40 |
{
|
41 |
-
"epoch": 0.
|
|
|
42 |
"learning_rate": 2.9400000000000002e-06,
|
43 |
-
"loss": 0.
|
44 |
"step": 150
|
45 |
},
|
46 |
{
|
47 |
-
"epoch": 0.
|
|
|
48 |
"learning_rate": 3.44e-06,
|
49 |
-
"loss": 0.
|
50 |
"step": 175
|
51 |
},
|
52 |
{
|
53 |
-
"epoch": 0.
|
|
|
54 |
"learning_rate": 3.94e-06,
|
55 |
-
"loss": 0.
|
56 |
"step": 200
|
57 |
},
|
58 |
{
|
59 |
-
"epoch": 0.
|
|
|
60 |
"learning_rate": 4.440000000000001e-06,
|
61 |
-
"loss": 0.
|
62 |
"step": 225
|
63 |
},
|
64 |
{
|
65 |
-
"epoch": 0.
|
|
|
66 |
"learning_rate": 4.94e-06,
|
67 |
-
"loss": 0.
|
68 |
"step": 250
|
69 |
},
|
70 |
{
|
71 |
-
"epoch": 0.
|
|
|
72 |
"learning_rate": 5.4400000000000004e-06,
|
73 |
-
"loss": 0.
|
74 |
"step": 275
|
75 |
},
|
76 |
{
|
77 |
-
"epoch": 0.
|
|
|
78 |
"learning_rate": 5.94e-06,
|
79 |
-
"loss": 0.
|
80 |
"step": 300
|
81 |
},
|
82 |
{
|
83 |
-
"epoch": 0.
|
|
|
84 |
"learning_rate": 6.440000000000001e-06,
|
85 |
-
"loss": 0.
|
86 |
"step": 325
|
87 |
},
|
88 |
{
|
89 |
-
"epoch": 0.
|
|
|
90 |
"learning_rate": 6.9400000000000005e-06,
|
91 |
-
"loss": 0.
|
92 |
"step": 350
|
93 |
},
|
94 |
{
|
95 |
-
"epoch": 0.
|
|
|
96 |
"learning_rate": 7.440000000000001e-06,
|
97 |
-
"loss": 0.
|
98 |
"step": 375
|
99 |
},
|
100 |
{
|
101 |
-
"epoch": 0.
|
|
|
102 |
"learning_rate": 7.94e-06,
|
103 |
-
"loss": 0.
|
104 |
"step": 400
|
105 |
},
|
106 |
{
|
107 |
-
"epoch": 0.
|
|
|
108 |
"learning_rate": 8.44e-06,
|
109 |
-
"loss": 0.
|
110 |
"step": 425
|
111 |
},
|
112 |
{
|
113 |
-
"epoch": 0.
|
|
|
114 |
"learning_rate": 8.94e-06,
|
115 |
-
"loss": 0.
|
116 |
"step": 450
|
117 |
},
|
118 |
{
|
119 |
-
"epoch": 0.
|
|
|
120 |
"learning_rate": 9.440000000000001e-06,
|
121 |
-
"loss": 0.
|
122 |
"step": 475
|
123 |
},
|
124 |
{
|
125 |
-
"epoch": 0.
|
|
|
126 |
"learning_rate": 9.940000000000001e-06,
|
127 |
-
"loss": 0.
|
128 |
"step": 500
|
129 |
},
|
130 |
{
|
131 |
-
"epoch": 0.
|
132 |
-
"
|
133 |
-
"
|
|
|
134 |
"step": 525
|
135 |
},
|
136 |
{
|
137 |
-
"epoch": 0.
|
138 |
-
"
|
139 |
-
"
|
|
|
140 |
"step": 550
|
141 |
},
|
142 |
{
|
143 |
-
"epoch": 0.
|
144 |
-
"
|
145 |
-
"
|
|
|
146 |
"step": 575
|
147 |
},
|
148 |
{
|
149 |
-
"epoch": 0.
|
150 |
-
"
|
151 |
-
"
|
|
|
152 |
"step": 600
|
153 |
},
|
154 |
{
|
155 |
-
"epoch": 0.
|
156 |
-
"
|
157 |
-
"
|
|
|
158 |
"step": 625
|
159 |
},
|
160 |
{
|
161 |
-
"epoch": 0.
|
162 |
-
"
|
163 |
-
"
|
|
|
164 |
"step": 650
|
165 |
},
|
166 |
{
|
167 |
-
"epoch": 0.
|
168 |
-
"
|
169 |
-
"
|
|
|
170 |
"step": 675
|
171 |
},
|
172 |
{
|
173 |
-
"epoch": 0.
|
174 |
-
"
|
175 |
-
"
|
|
|
176 |
"step": 700
|
177 |
},
|
178 |
{
|
179 |
-
"epoch": 0.
|
180 |
-
"
|
181 |
-
"
|
|
|
182 |
"step": 725
|
183 |
},
|
184 |
{
|
185 |
-
"epoch": 0.
|
186 |
-
"
|
187 |
-
"
|
|
|
188 |
"step": 750
|
189 |
},
|
190 |
{
|
191 |
-
"epoch": 0.
|
192 |
-
"
|
193 |
-
"
|
|
|
194 |
"step": 775
|
195 |
},
|
196 |
{
|
197 |
-
"epoch": 1
|
198 |
-
"
|
199 |
-
"
|
|
|
200 |
"step": 800
|
201 |
},
|
202 |
{
|
203 |
-
"epoch":
|
204 |
-
"
|
205 |
-
"
|
|
|
206 |
"step": 825
|
207 |
},
|
208 |
{
|
209 |
-
"epoch":
|
210 |
-
"
|
211 |
-
"
|
|
|
212 |
"step": 850
|
213 |
},
|
214 |
{
|
215 |
-
"epoch":
|
216 |
-
"
|
217 |
-
"
|
|
|
218 |
"step": 875
|
219 |
},
|
220 |
{
|
221 |
-
"epoch":
|
222 |
-
"
|
223 |
-
"
|
|
|
224 |
"step": 900
|
225 |
},
|
226 |
{
|
227 |
-
"epoch":
|
228 |
-
"
|
229 |
-
"
|
|
|
230 |
"step": 925
|
231 |
},
|
232 |
{
|
233 |
-
"epoch":
|
234 |
-
"
|
235 |
-
"
|
|
|
236 |
"step": 950
|
237 |
},
|
238 |
{
|
239 |
-
"epoch":
|
240 |
-
"
|
241 |
-
"
|
|
|
242 |
"step": 975
|
243 |
},
|
244 |
{
|
245 |
-
"epoch":
|
246 |
-
"
|
247 |
-
"
|
|
|
248 |
"step": 1000
|
249 |
},
|
250 |
{
|
251 |
-
"epoch":
|
252 |
-
"eval_loss": 0.
|
253 |
-
"eval_runtime":
|
254 |
-
"eval_samples_per_second":
|
255 |
-
"eval_steps_per_second": 0.
|
256 |
-
"eval_wer":
|
257 |
"step": 1000
|
258 |
},
|
259 |
{
|
260 |
-
"epoch":
|
261 |
-
"
|
262 |
-
"
|
|
|
263 |
"step": 1025
|
264 |
},
|
265 |
{
|
266 |
-
"epoch":
|
267 |
-
"
|
268 |
-
"
|
|
|
269 |
"step": 1050
|
270 |
},
|
271 |
{
|
272 |
-
"epoch":
|
273 |
-
"
|
274 |
-
"
|
|
|
275 |
"step": 1075
|
276 |
},
|
277 |
{
|
278 |
-
"epoch":
|
279 |
-
"
|
280 |
-
"
|
|
|
281 |
"step": 1100
|
282 |
},
|
283 |
{
|
284 |
-
"epoch":
|
285 |
-
"
|
286 |
-
"
|
|
|
287 |
"step": 1125
|
288 |
},
|
289 |
{
|
290 |
-
"epoch":
|
291 |
-
"
|
292 |
-
"
|
|
|
293 |
"step": 1150
|
294 |
},
|
295 |
{
|
296 |
-
"epoch":
|
297 |
-
"
|
298 |
-
"
|
|
|
299 |
"step": 1175
|
300 |
},
|
301 |
{
|
302 |
-
"epoch":
|
303 |
-
"
|
304 |
-
"
|
|
|
305 |
"step": 1200
|
306 |
},
|
307 |
{
|
308 |
-
"epoch":
|
309 |
-
"
|
310 |
-
"
|
|
|
311 |
"step": 1225
|
312 |
},
|
313 |
{
|
314 |
-
"epoch":
|
315 |
-
"
|
316 |
-
"
|
|
|
317 |
"step": 1250
|
318 |
},
|
319 |
{
|
320 |
-
"epoch":
|
321 |
-
"
|
322 |
-
"
|
|
|
323 |
"step": 1275
|
324 |
},
|
325 |
{
|
326 |
-
"epoch":
|
327 |
-
"
|
328 |
-
"
|
|
|
329 |
"step": 1300
|
330 |
},
|
331 |
{
|
332 |
-
"epoch":
|
333 |
-
"
|
334 |
-
"
|
|
|
335 |
"step": 1325
|
336 |
},
|
337 |
{
|
338 |
-
"epoch":
|
339 |
-
"
|
340 |
-
"
|
|
|
341 |
"step": 1350
|
342 |
},
|
343 |
{
|
344 |
-
"epoch":
|
345 |
-
"
|
346 |
-
"
|
|
|
347 |
"step": 1375
|
348 |
},
|
349 |
{
|
350 |
-
"epoch":
|
351 |
-
"
|
352 |
-
"
|
|
|
353 |
"step": 1400
|
354 |
},
|
355 |
{
|
356 |
-
"epoch":
|
357 |
-
"
|
358 |
-
"
|
|
|
359 |
"step": 1425
|
360 |
},
|
361 |
{
|
362 |
-
"epoch":
|
363 |
-
"
|
364 |
-
"
|
|
|
365 |
"step": 1450
|
366 |
},
|
367 |
{
|
368 |
-
"epoch":
|
369 |
-
"
|
370 |
-
"
|
|
|
371 |
"step": 1475
|
372 |
},
|
373 |
{
|
374 |
-
"epoch":
|
375 |
-
"
|
376 |
-
"
|
|
|
377 |
"step": 1500
|
378 |
},
|
379 |
{
|
380 |
-
"epoch":
|
381 |
-
"
|
382 |
-
"
|
|
|
383 |
"step": 1525
|
384 |
},
|
385 |
{
|
386 |
-
"epoch":
|
387 |
-
"
|
388 |
-
"
|
|
|
389 |
"step": 1550
|
390 |
},
|
391 |
{
|
392 |
-
"epoch":
|
393 |
-
"
|
394 |
-
"
|
|
|
395 |
"step": 1575
|
396 |
},
|
397 |
{
|
398 |
-
"epoch": 2
|
399 |
-
"
|
400 |
-
"
|
|
|
401 |
"step": 1600
|
402 |
},
|
403 |
{
|
404 |
-
"epoch":
|
405 |
-
"
|
406 |
-
"
|
|
|
407 |
"step": 1625
|
408 |
},
|
409 |
{
|
410 |
-
"epoch":
|
411 |
-
"
|
412 |
-
"
|
|
|
413 |
"step": 1650
|
414 |
},
|
415 |
{
|
416 |
-
"epoch":
|
417 |
-
"
|
418 |
-
"
|
|
|
419 |
"step": 1675
|
420 |
},
|
421 |
{
|
422 |
-
"epoch":
|
423 |
-
"
|
424 |
-
"
|
|
|
425 |
"step": 1700
|
426 |
},
|
427 |
{
|
428 |
-
"epoch":
|
429 |
-
"
|
430 |
-
"
|
|
|
431 |
"step": 1725
|
432 |
},
|
433 |
{
|
434 |
-
"epoch":
|
435 |
-
"
|
436 |
-
"
|
|
|
437 |
"step": 1750
|
438 |
},
|
439 |
{
|
440 |
-
"epoch":
|
441 |
-
"
|
442 |
-
"
|
|
|
443 |
"step": 1775
|
444 |
},
|
445 |
{
|
446 |
-
"epoch":
|
447 |
-
"
|
448 |
-
"
|
|
|
449 |
"step": 1800
|
450 |
},
|
451 |
{
|
452 |
-
"epoch":
|
453 |
-
"
|
454 |
-
"
|
|
|
455 |
"step": 1825
|
456 |
},
|
457 |
{
|
458 |
-
"epoch":
|
459 |
-
"
|
460 |
-
"
|
|
|
461 |
"step": 1850
|
462 |
},
|
463 |
{
|
464 |
-
"epoch":
|
465 |
-
"
|
466 |
-
"
|
|
|
467 |
"step": 1875
|
468 |
},
|
469 |
{
|
470 |
-
"epoch":
|
471 |
-
"
|
472 |
-
"
|
|
|
473 |
"step": 1900
|
474 |
},
|
475 |
{
|
476 |
-
"epoch":
|
477 |
-
"
|
478 |
-
"
|
|
|
479 |
"step": 1925
|
480 |
},
|
481 |
{
|
482 |
-
"epoch":
|
483 |
-
"
|
484 |
-
"
|
|
|
485 |
"step": 1950
|
486 |
},
|
487 |
{
|
488 |
-
"epoch":
|
489 |
-
"
|
490 |
-
"
|
|
|
491 |
"step": 1975
|
492 |
},
|
493 |
{
|
494 |
-
"epoch":
|
495 |
-
"
|
496 |
-
"
|
|
|
497 |
"step": 2000
|
498 |
},
|
499 |
{
|
500 |
-
"epoch":
|
501 |
-
"eval_loss": 0.
|
502 |
-
"eval_runtime":
|
503 |
-
"eval_samples_per_second":
|
504 |
-
"eval_steps_per_second": 0.
|
505 |
-
"eval_wer":
|
506 |
"step": 2000
|
507 |
},
|
508 |
{
|
509 |
-
"epoch":
|
510 |
-
"
|
511 |
-
"
|
|
|
512 |
"step": 2025
|
513 |
},
|
514 |
{
|
515 |
-
"epoch":
|
516 |
-
"
|
517 |
-
"
|
|
|
518 |
"step": 2050
|
519 |
},
|
520 |
{
|
521 |
-
"epoch":
|
522 |
-
"
|
523 |
-
"
|
|
|
524 |
"step": 2075
|
525 |
},
|
526 |
{
|
527 |
-
"epoch":
|
528 |
-
"
|
529 |
-
"
|
|
|
530 |
"step": 2100
|
531 |
},
|
532 |
{
|
533 |
-
"epoch":
|
534 |
-
"
|
535 |
-
"
|
|
|
536 |
"step": 2125
|
537 |
},
|
538 |
{
|
539 |
-
"epoch":
|
540 |
-
"
|
541 |
-
"
|
|
|
542 |
"step": 2150
|
543 |
},
|
544 |
{
|
545 |
-
"epoch":
|
546 |
-
"
|
547 |
-
"
|
|
|
548 |
"step": 2175
|
549 |
},
|
550 |
{
|
551 |
-
"epoch":
|
552 |
-
"
|
553 |
-
"
|
|
|
554 |
"step": 2200
|
555 |
},
|
556 |
{
|
557 |
-
"epoch":
|
558 |
-
"
|
559 |
-
"
|
|
|
560 |
"step": 2225
|
561 |
},
|
562 |
{
|
563 |
-
"epoch":
|
564 |
-
"
|
565 |
-
"
|
|
|
566 |
"step": 2250
|
567 |
},
|
568 |
{
|
569 |
-
"epoch":
|
570 |
-
"
|
571 |
-
"
|
|
|
572 |
"step": 2275
|
573 |
},
|
574 |
{
|
575 |
-
"epoch":
|
576 |
-
"
|
577 |
-
"
|
|
|
578 |
"step": 2300
|
579 |
},
|
580 |
{
|
581 |
-
"epoch":
|
582 |
-
"
|
583 |
-
"
|
|
|
584 |
"step": 2325
|
585 |
},
|
586 |
{
|
587 |
-
"epoch":
|
588 |
-
"
|
589 |
-
"
|
|
|
590 |
"step": 2350
|
591 |
},
|
592 |
{
|
593 |
-
"epoch":
|
594 |
-
"
|
595 |
-
"
|
|
|
596 |
"step": 2375
|
597 |
},
|
598 |
{
|
599 |
-
"epoch": 3
|
600 |
-
"
|
601 |
-
"
|
|
|
602 |
"step": 2400
|
603 |
},
|
604 |
{
|
605 |
-
"epoch":
|
606 |
-
"
|
607 |
-
"
|
|
|
608 |
"step": 2425
|
609 |
},
|
610 |
{
|
611 |
-
"epoch":
|
612 |
-
"
|
613 |
-
"
|
|
|
614 |
"step": 2450
|
615 |
},
|
616 |
{
|
617 |
-
"epoch":
|
618 |
-
"
|
619 |
-
"
|
|
|
620 |
"step": 2475
|
621 |
},
|
622 |
{
|
623 |
-
"epoch":
|
624 |
-
"
|
625 |
-
"
|
|
|
626 |
"step": 2500
|
627 |
},
|
628 |
{
|
629 |
-
"epoch":
|
630 |
-
"
|
631 |
-
"
|
|
|
632 |
"step": 2525
|
633 |
},
|
634 |
{
|
635 |
-
"epoch":
|
636 |
-
"
|
637 |
-
"
|
|
|
638 |
"step": 2550
|
639 |
},
|
640 |
{
|
641 |
-
"epoch":
|
642 |
-
"
|
643 |
-
"
|
|
|
644 |
"step": 2575
|
645 |
},
|
646 |
{
|
647 |
-
"epoch":
|
648 |
-
"
|
649 |
-
"
|
|
|
650 |
"step": 2600
|
651 |
},
|
652 |
{
|
653 |
-
"epoch":
|
654 |
-
"
|
655 |
-
"
|
|
|
656 |
"step": 2625
|
657 |
},
|
658 |
{
|
659 |
-
"epoch":
|
660 |
-
"
|
661 |
-
"
|
|
|
662 |
"step": 2650
|
663 |
},
|
664 |
{
|
665 |
-
"epoch":
|
666 |
-
"
|
667 |
-
"
|
|
|
668 |
"step": 2675
|
669 |
},
|
670 |
{
|
671 |
-
"epoch":
|
672 |
-
"
|
673 |
-
"
|
|
|
674 |
"step": 2700
|
675 |
},
|
676 |
{
|
677 |
-
"epoch":
|
678 |
-
"
|
679 |
-
"
|
|
|
680 |
"step": 2725
|
681 |
},
|
682 |
{
|
683 |
-
"epoch":
|
684 |
-
"
|
685 |
-
"
|
|
|
686 |
"step": 2750
|
687 |
},
|
688 |
{
|
689 |
-
"epoch":
|
690 |
-
"
|
691 |
-
"
|
|
|
692 |
"step": 2775
|
693 |
},
|
694 |
{
|
695 |
-
"epoch":
|
696 |
-
"
|
697 |
-
"
|
|
|
698 |
"step": 2800
|
699 |
},
|
700 |
{
|
701 |
-
"epoch":
|
702 |
-
"
|
703 |
-
"
|
|
|
704 |
"step": 2825
|
705 |
},
|
706 |
{
|
707 |
-
"epoch":
|
708 |
-
"
|
709 |
-
"
|
|
|
710 |
"step": 2850
|
711 |
},
|
712 |
{
|
713 |
-
"epoch":
|
714 |
-
"
|
715 |
-
"
|
|
|
716 |
"step": 2875
|
717 |
},
|
718 |
{
|
719 |
-
"epoch":
|
720 |
-
"
|
721 |
-
"
|
|
|
722 |
"step": 2900
|
723 |
},
|
724 |
{
|
725 |
-
"epoch":
|
726 |
-
"
|
727 |
-
"
|
|
|
728 |
"step": 2925
|
729 |
},
|
730 |
{
|
731 |
-
"epoch":
|
732 |
-
"
|
733 |
-
"
|
|
|
734 |
"step": 2950
|
735 |
},
|
736 |
{
|
737 |
-
"epoch":
|
738 |
-
"
|
739 |
-
"
|
|
|
740 |
"step": 2975
|
741 |
},
|
742 |
{
|
743 |
-
"epoch":
|
744 |
-
"
|
745 |
-
"
|
|
|
746 |
"step": 3000
|
747 |
},
|
748 |
{
|
749 |
-
"epoch":
|
750 |
-
"eval_loss": 0.
|
751 |
-
"eval_runtime":
|
752 |
-
"eval_samples_per_second":
|
753 |
-
"eval_steps_per_second": 0.
|
754 |
-
"eval_wer": 13.
|
755 |
"step": 3000
|
756 |
},
|
757 |
{
|
758 |
-
"epoch":
|
759 |
-
"
|
760 |
-
"
|
|
|
761 |
"step": 3025
|
762 |
},
|
763 |
{
|
764 |
-
"epoch":
|
765 |
-
"
|
766 |
-
"
|
|
|
767 |
"step": 3050
|
768 |
},
|
769 |
{
|
770 |
-
"epoch":
|
771 |
-
"
|
772 |
-
"
|
|
|
773 |
"step": 3075
|
774 |
},
|
775 |
{
|
776 |
-
"epoch":
|
777 |
-
"
|
778 |
-
"
|
|
|
779 |
"step": 3100
|
780 |
},
|
781 |
{
|
782 |
-
"epoch":
|
783 |
-
"
|
784 |
-
"
|
|
|
785 |
"step": 3125
|
786 |
},
|
787 |
{
|
788 |
-
"epoch":
|
789 |
-
"
|
790 |
-
"
|
|
|
791 |
"step": 3150
|
792 |
},
|
793 |
{
|
794 |
-
"epoch":
|
795 |
-
"
|
796 |
-
"
|
|
|
797 |
"step": 3175
|
798 |
},
|
799 |
{
|
800 |
-
"epoch": 4
|
801 |
-
"
|
802 |
-
"
|
|
|
803 |
"step": 3200
|
804 |
},
|
805 |
{
|
806 |
-
"epoch":
|
807 |
-
"
|
808 |
-
"
|
|
|
809 |
"step": 3225
|
810 |
},
|
811 |
{
|
812 |
-
"epoch":
|
813 |
-
"
|
814 |
-
"
|
|
|
815 |
"step": 3250
|
816 |
},
|
817 |
{
|
818 |
-
"epoch":
|
819 |
-
"
|
820 |
-
"
|
|
|
821 |
"step": 3275
|
822 |
},
|
823 |
{
|
824 |
-
"epoch":
|
825 |
-
"
|
826 |
-
"
|
|
|
827 |
"step": 3300
|
828 |
},
|
829 |
{
|
830 |
-
"epoch":
|
831 |
-
"
|
832 |
-
"
|
|
|
833 |
"step": 3325
|
834 |
},
|
835 |
{
|
836 |
-
"epoch":
|
837 |
-
"
|
838 |
-
"
|
|
|
839 |
"step": 3350
|
840 |
},
|
841 |
{
|
842 |
-
"epoch":
|
843 |
-
"
|
844 |
-
"
|
|
|
845 |
"step": 3375
|
846 |
},
|
847 |
{
|
848 |
-
"epoch":
|
849 |
-
"
|
850 |
-
"
|
|
|
851 |
"step": 3400
|
852 |
},
|
853 |
{
|
854 |
-
"epoch":
|
855 |
-
"
|
856 |
-
"
|
|
|
857 |
"step": 3425
|
858 |
},
|
859 |
{
|
860 |
-
"epoch":
|
861 |
-
"
|
862 |
-
"
|
|
|
863 |
"step": 3450
|
864 |
},
|
865 |
{
|
866 |
-
"epoch":
|
867 |
-
"
|
868 |
-
"
|
|
|
869 |
"step": 3475
|
870 |
},
|
871 |
{
|
872 |
-
"epoch":
|
873 |
-
"
|
874 |
-
"
|
|
|
875 |
"step": 3500
|
876 |
},
|
877 |
{
|
878 |
-
"epoch":
|
879 |
-
"
|
880 |
-
"
|
|
|
881 |
"step": 3525
|
882 |
},
|
883 |
{
|
884 |
-
"epoch":
|
885 |
-
"
|
886 |
-
"
|
|
|
887 |
"step": 3550
|
888 |
},
|
889 |
{
|
890 |
-
"epoch":
|
891 |
-
"
|
892 |
-
"
|
|
|
893 |
"step": 3575
|
894 |
},
|
895 |
{
|
896 |
-
"epoch":
|
897 |
-
"
|
898 |
-
"
|
|
|
899 |
"step": 3600
|
900 |
},
|
901 |
{
|
902 |
-
"epoch":
|
903 |
-
"
|
904 |
-
"
|
|
|
905 |
"step": 3625
|
906 |
},
|
907 |
{
|
908 |
-
"epoch":
|
909 |
-
"
|
910 |
-
"
|
|
|
911 |
"step": 3650
|
912 |
},
|
913 |
{
|
914 |
-
"epoch":
|
915 |
-
"
|
916 |
-
"
|
|
|
917 |
"step": 3675
|
918 |
},
|
919 |
{
|
920 |
-
"epoch":
|
921 |
-
"
|
922 |
-
"
|
|
|
923 |
"step": 3700
|
924 |
},
|
925 |
{
|
926 |
-
"epoch":
|
927 |
-
"
|
928 |
-
"
|
|
|
929 |
"step": 3725
|
930 |
},
|
931 |
{
|
932 |
-
"epoch":
|
933 |
-
"
|
934 |
-
"
|
|
|
935 |
"step": 3750
|
936 |
},
|
937 |
{
|
938 |
-
"epoch":
|
939 |
-
"
|
940 |
-
"
|
|
|
941 |
"step": 3775
|
942 |
},
|
943 |
{
|
944 |
-
"epoch":
|
945 |
-
"
|
946 |
-
"
|
|
|
947 |
"step": 3800
|
948 |
},
|
949 |
{
|
950 |
-
"epoch":
|
951 |
-
"
|
952 |
-
"
|
|
|
953 |
"step": 3825
|
954 |
},
|
955 |
{
|
956 |
-
"epoch":
|
957 |
-
"
|
958 |
-
"
|
|
|
959 |
"step": 3850
|
960 |
},
|
961 |
{
|
962 |
-
"epoch":
|
963 |
-
"
|
964 |
-
"
|
|
|
965 |
"step": 3875
|
966 |
},
|
967 |
{
|
968 |
-
"epoch":
|
969 |
-
"
|
970 |
-
"
|
|
|
971 |
"step": 3900
|
972 |
},
|
973 |
{
|
974 |
-
"epoch":
|
975 |
-
"
|
976 |
-
"
|
|
|
977 |
"step": 3925
|
978 |
},
|
979 |
{
|
980 |
-
"epoch":
|
981 |
-
"
|
982 |
-
"
|
|
|
983 |
"step": 3950
|
984 |
},
|
985 |
{
|
986 |
-
"epoch":
|
987 |
-
"
|
988 |
-
"
|
|
|
989 |
"step": 3975
|
990 |
},
|
991 |
{
|
992 |
-
"epoch": 5
|
993 |
-
"
|
994 |
-
"
|
|
|
995 |
"step": 4000
|
996 |
},
|
997 |
{
|
998 |
-
"epoch": 5
|
999 |
-
"eval_loss": 0.
|
1000 |
-
"eval_runtime":
|
1001 |
-
"eval_samples_per_second":
|
1002 |
-
"eval_steps_per_second": 0.
|
1003 |
-
"eval_wer": 12.
|
1004 |
"step": 4000
|
1005 |
},
|
1006 |
{
|
1007 |
-
"epoch":
|
1008 |
-
"
|
1009 |
-
"
|
|
|
1010 |
"step": 4025
|
1011 |
},
|
1012 |
{
|
1013 |
-
"epoch":
|
1014 |
-
"
|
1015 |
-
"
|
|
|
1016 |
"step": 4050
|
1017 |
},
|
1018 |
{
|
1019 |
-
"epoch":
|
1020 |
-
"
|
1021 |
-
"
|
|
|
1022 |
"step": 4075
|
1023 |
},
|
1024 |
{
|
1025 |
-
"epoch":
|
1026 |
-
"
|
1027 |
-
"
|
|
|
1028 |
"step": 4100
|
1029 |
},
|
1030 |
{
|
1031 |
-
"epoch":
|
1032 |
-
"
|
1033 |
-
"
|
|
|
1034 |
"step": 4125
|
1035 |
},
|
1036 |
{
|
1037 |
-
"epoch":
|
1038 |
-
"
|
1039 |
-
"
|
|
|
1040 |
"step": 4150
|
1041 |
},
|
1042 |
{
|
1043 |
-
"epoch":
|
1044 |
-
"
|
1045 |
-
"
|
|
|
1046 |
"step": 4175
|
1047 |
},
|
1048 |
{
|
1049 |
-
"epoch":
|
1050 |
-
"
|
1051 |
-
"
|
|
|
1052 |
"step": 4200
|
1053 |
},
|
1054 |
{
|
1055 |
-
"epoch":
|
1056 |
-
"
|
1057 |
-
"
|
|
|
1058 |
"step": 4225
|
1059 |
},
|
1060 |
{
|
1061 |
-
"epoch":
|
1062 |
-
"
|
1063 |
-
"
|
|
|
1064 |
"step": 4250
|
1065 |
},
|
1066 |
{
|
1067 |
-
"epoch":
|
1068 |
-
"
|
1069 |
-
"
|
|
|
1070 |
"step": 4275
|
1071 |
},
|
1072 |
{
|
1073 |
-
"epoch":
|
1074 |
-
"
|
1075 |
-
"
|
|
|
1076 |
"step": 4300
|
1077 |
},
|
1078 |
{
|
1079 |
-
"epoch":
|
1080 |
-
"
|
1081 |
-
"
|
|
|
1082 |
"step": 4325
|
1083 |
},
|
1084 |
{
|
1085 |
-
"epoch":
|
1086 |
-
"
|
1087 |
-
"
|
|
|
1088 |
"step": 4350
|
1089 |
},
|
1090 |
{
|
1091 |
-
"epoch":
|
1092 |
-
"
|
1093 |
-
"
|
|
|
1094 |
"step": 4375
|
1095 |
},
|
1096 |
{
|
1097 |
-
"epoch":
|
1098 |
-
"
|
1099 |
-
"
|
|
|
1100 |
"step": 4400
|
1101 |
},
|
1102 |
{
|
1103 |
-
"epoch":
|
1104 |
-
"
|
1105 |
-
"
|
|
|
1106 |
"step": 4425
|
1107 |
},
|
1108 |
{
|
1109 |
-
"epoch":
|
1110 |
-
"
|
1111 |
-
"
|
|
|
1112 |
"step": 4450
|
1113 |
},
|
1114 |
{
|
1115 |
-
"epoch":
|
1116 |
-
"
|
1117 |
-
"
|
|
|
1118 |
"step": 4475
|
1119 |
},
|
1120 |
{
|
1121 |
-
"epoch":
|
1122 |
-
"
|
1123 |
-
"
|
|
|
1124 |
"step": 4500
|
1125 |
},
|
1126 |
{
|
1127 |
-
"epoch":
|
1128 |
-
"
|
1129 |
-
"
|
|
|
1130 |
"step": 4525
|
1131 |
},
|
1132 |
{
|
1133 |
-
"epoch":
|
1134 |
-
"
|
1135 |
-
"
|
|
|
1136 |
"step": 4550
|
1137 |
},
|
1138 |
{
|
1139 |
-
"epoch":
|
1140 |
-
"
|
1141 |
-
"
|
|
|
1142 |
"step": 4575
|
1143 |
},
|
1144 |
{
|
1145 |
-
"epoch":
|
1146 |
-
"
|
1147 |
-
"
|
|
|
1148 |
"step": 4600
|
1149 |
},
|
1150 |
{
|
1151 |
-
"epoch":
|
1152 |
-
"
|
1153 |
-
"
|
|
|
1154 |
"step": 4625
|
1155 |
},
|
1156 |
{
|
1157 |
-
"epoch":
|
1158 |
-
"
|
1159 |
-
"
|
|
|
1160 |
"step": 4650
|
1161 |
},
|
1162 |
{
|
1163 |
-
"epoch":
|
1164 |
-
"
|
1165 |
-
"
|
|
|
1166 |
"step": 4675
|
1167 |
},
|
1168 |
{
|
1169 |
-
"epoch":
|
1170 |
-
"
|
1171 |
-
"
|
|
|
1172 |
"step": 4700
|
1173 |
},
|
1174 |
{
|
1175 |
-
"epoch":
|
1176 |
-
"
|
1177 |
-
"
|
|
|
1178 |
"step": 4725
|
1179 |
},
|
1180 |
{
|
1181 |
-
"epoch":
|
1182 |
-
"
|
1183 |
-
"
|
|
|
1184 |
"step": 4750
|
1185 |
},
|
1186 |
{
|
1187 |
-
"epoch":
|
1188 |
-
"
|
1189 |
-
"
|
|
|
1190 |
"step": 4775
|
1191 |
},
|
1192 |
{
|
1193 |
-
"epoch": 6
|
1194 |
-
"
|
1195 |
-
"
|
|
|
1196 |
"step": 4800
|
1197 |
},
|
1198 |
{
|
1199 |
-
"epoch":
|
1200 |
-
"
|
1201 |
-
"
|
|
|
1202 |
"step": 4825
|
1203 |
},
|
1204 |
{
|
1205 |
-
"epoch":
|
1206 |
-
"
|
1207 |
-
"
|
|
|
1208 |
"step": 4850
|
1209 |
},
|
1210 |
{
|
1211 |
-
"epoch":
|
1212 |
-
"
|
1213 |
-
"
|
|
|
1214 |
"step": 4875
|
1215 |
},
|
1216 |
{
|
1217 |
-
"epoch":
|
1218 |
-
"
|
1219 |
-
"
|
|
|
1220 |
"step": 4900
|
1221 |
},
|
1222 |
{
|
1223 |
-
"epoch":
|
1224 |
-
"
|
1225 |
-
"
|
|
|
1226 |
"step": 4925
|
1227 |
},
|
1228 |
{
|
1229 |
-
"epoch":
|
1230 |
-
"
|
1231 |
-
"
|
|
|
1232 |
"step": 4950
|
1233 |
},
|
1234 |
{
|
1235 |
-
"epoch":
|
1236 |
-
"
|
1237 |
-
"
|
|
|
1238 |
"step": 4975
|
1239 |
},
|
1240 |
{
|
1241 |
-
"epoch":
|
1242 |
-
"
|
1243 |
-
"
|
|
|
1244 |
"step": 5000
|
1245 |
},
|
1246 |
{
|
1247 |
-
"epoch":
|
1248 |
-
"eval_loss": 0.
|
1249 |
-
"eval_runtime":
|
1250 |
-
"eval_samples_per_second":
|
1251 |
-
"eval_steps_per_second": 0.
|
1252 |
-
"eval_wer":
|
1253 |
"step": 5000
|
1254 |
},
|
1255 |
{
|
1256 |
-
"epoch":
|
1257 |
-
"
|
1258 |
-
"
|
1259 |
-
"
|
1260 |
-
"
|
1261 |
-
|
1262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1263 |
}
|
1264 |
],
|
1265 |
-
"
|
|
|
|
|
1266 |
"num_train_epochs": 9223372036854775807,
|
1267 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1268 |
"trial_name": null,
|
1269 |
"trial_params": null
|
1270 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 10.886229784051602,
|
3 |
+
"best_model_checkpoint": "./checkpoint-8000",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 8000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.003125,
|
13 |
+
"grad_norm": 17.579944610595703,
|
14 |
"learning_rate": 4.4e-07,
|
15 |
+
"loss": 2.3284,
|
16 |
"step": 25
|
17 |
},
|
18 |
{
|
19 |
+
"epoch": 0.00625,
|
20 |
+
"grad_norm": 9.753120422363281,
|
21 |
"learning_rate": 9.400000000000001e-07,
|
22 |
+
"loss": 1.9145,
|
23 |
"step": 50
|
24 |
},
|
25 |
{
|
26 |
+
"epoch": 0.009375,
|
27 |
+
"grad_norm": 9.469987869262695,
|
28 |
"learning_rate": 1.44e-06,
|
29 |
+
"loss": 1.2892,
|
30 |
"step": 75
|
31 |
},
|
32 |
{
|
33 |
+
"epoch": 0.0125,
|
34 |
+
"grad_norm": 6.952774524688721,
|
35 |
"learning_rate": 1.94e-06,
|
36 |
+
"loss": 0.9797,
|
37 |
"step": 100
|
38 |
},
|
39 |
{
|
40 |
+
"epoch": 0.015625,
|
41 |
+
"grad_norm": 6.080902576446533,
|
42 |
"learning_rate": 2.4400000000000004e-06,
|
43 |
+
"loss": 0.8265,
|
44 |
"step": 125
|
45 |
},
|
46 |
{
|
47 |
+
"epoch": 0.01875,
|
48 |
+
"grad_norm": 5.6766037940979,
|
49 |
"learning_rate": 2.9400000000000002e-06,
|
50 |
+
"loss": 0.6998,
|
51 |
"step": 150
|
52 |
},
|
53 |
{
|
54 |
+
"epoch": 0.021875,
|
55 |
+
"grad_norm": 5.372249126434326,
|
56 |
"learning_rate": 3.44e-06,
|
57 |
+
"loss": 0.6537,
|
58 |
"step": 175
|
59 |
},
|
60 |
{
|
61 |
+
"epoch": 0.025,
|
62 |
+
"grad_norm": 5.710323810577393,
|
63 |
"learning_rate": 3.94e-06,
|
64 |
+
"loss": 0.6149,
|
65 |
"step": 200
|
66 |
},
|
67 |
{
|
68 |
+
"epoch": 0.028125,
|
69 |
+
"grad_norm": 5.235953330993652,
|
70 |
"learning_rate": 4.440000000000001e-06,
|
71 |
+
"loss": 0.5256,
|
72 |
"step": 225
|
73 |
},
|
74 |
{
|
75 |
+
"epoch": 0.03125,
|
76 |
+
"grad_norm": 6.58635950088501,
|
77 |
"learning_rate": 4.94e-06,
|
78 |
+
"loss": 0.54,
|
79 |
"step": 250
|
80 |
},
|
81 |
{
|
82 |
+
"epoch": 0.034375,
|
83 |
+
"grad_norm": 5.4912004470825195,
|
84 |
"learning_rate": 5.4400000000000004e-06,
|
85 |
+
"loss": 0.5521,
|
86 |
"step": 275
|
87 |
},
|
88 |
{
|
89 |
+
"epoch": 0.0375,
|
90 |
+
"grad_norm": 5.846869945526123,
|
91 |
"learning_rate": 5.94e-06,
|
92 |
+
"loss": 0.5379,
|
93 |
"step": 300
|
94 |
},
|
95 |
{
|
96 |
+
"epoch": 0.040625,
|
97 |
+
"grad_norm": 5.060309410095215,
|
98 |
"learning_rate": 6.440000000000001e-06,
|
99 |
+
"loss": 0.4778,
|
100 |
"step": 325
|
101 |
},
|
102 |
{
|
103 |
+
"epoch": 0.04375,
|
104 |
+
"grad_norm": 5.06487512588501,
|
105 |
"learning_rate": 6.9400000000000005e-06,
|
106 |
+
"loss": 0.4152,
|
107 |
"step": 350
|
108 |
},
|
109 |
{
|
110 |
+
"epoch": 0.046875,
|
111 |
+
"grad_norm": 4.936045169830322,
|
112 |
"learning_rate": 7.440000000000001e-06,
|
113 |
+
"loss": 0.3547,
|
114 |
"step": 375
|
115 |
},
|
116 |
{
|
117 |
+
"epoch": 0.05,
|
118 |
+
"grad_norm": 3.8072471618652344,
|
119 |
"learning_rate": 7.94e-06,
|
120 |
+
"loss": 0.3428,
|
121 |
"step": 400
|
122 |
},
|
123 |
{
|
124 |
+
"epoch": 0.053125,
|
125 |
+
"grad_norm": 3.9378795623779297,
|
126 |
"learning_rate": 8.44e-06,
|
127 |
+
"loss": 0.3099,
|
128 |
"step": 425
|
129 |
},
|
130 |
{
|
131 |
+
"epoch": 0.05625,
|
132 |
+
"grad_norm": 3.732869863510132,
|
133 |
"learning_rate": 8.94e-06,
|
134 |
+
"loss": 0.2963,
|
135 |
"step": 450
|
136 |
},
|
137 |
{
|
138 |
+
"epoch": 0.059375,
|
139 |
+
"grad_norm": 3.9596025943756104,
|
140 |
"learning_rate": 9.440000000000001e-06,
|
141 |
+
"loss": 0.2745,
|
142 |
"step": 475
|
143 |
},
|
144 |
{
|
145 |
+
"epoch": 0.0625,
|
146 |
+
"grad_norm": 3.428398370742798,
|
147 |
"learning_rate": 9.940000000000001e-06,
|
148 |
+
"loss": 0.2626,
|
149 |
"step": 500
|
150 |
},
|
151 |
{
|
152 |
+
"epoch": 0.065625,
|
153 |
+
"grad_norm": 5.03747034072876,
|
154 |
+
"learning_rate": 9.970666666666668e-06,
|
155 |
+
"loss": 0.2411,
|
156 |
"step": 525
|
157 |
},
|
158 |
{
|
159 |
+
"epoch": 0.06875,
|
160 |
+
"grad_norm": 3.2012217044830322,
|
161 |
+
"learning_rate": 9.937333333333334e-06,
|
162 |
+
"loss": 0.2389,
|
163 |
"step": 550
|
164 |
},
|
165 |
{
|
166 |
+
"epoch": 0.071875,
|
167 |
+
"grad_norm": 3.7361278533935547,
|
168 |
+
"learning_rate": 9.904e-06,
|
169 |
+
"loss": 0.2217,
|
170 |
"step": 575
|
171 |
},
|
172 |
{
|
173 |
+
"epoch": 0.075,
|
174 |
+
"grad_norm": 4.509885787963867,
|
175 |
+
"learning_rate": 9.870666666666667e-06,
|
176 |
+
"loss": 0.2246,
|
177 |
"step": 600
|
178 |
},
|
179 |
{
|
180 |
+
"epoch": 0.078125,
|
181 |
+
"grad_norm": 3.462961435317993,
|
182 |
+
"learning_rate": 9.837333333333335e-06,
|
183 |
+
"loss": 0.199,
|
184 |
"step": 625
|
185 |
},
|
186 |
{
|
187 |
+
"epoch": 0.08125,
|
188 |
+
"grad_norm": 2.764691114425659,
|
189 |
+
"learning_rate": 9.804000000000001e-06,
|
190 |
+
"loss": 0.2156,
|
191 |
"step": 650
|
192 |
},
|
193 |
{
|
194 |
+
"epoch": 0.084375,
|
195 |
+
"grad_norm": 3.059408187866211,
|
196 |
+
"learning_rate": 9.770666666666668e-06,
|
197 |
+
"loss": 0.212,
|
198 |
"step": 675
|
199 |
},
|
200 |
{
|
201 |
+
"epoch": 0.0875,
|
202 |
+
"grad_norm": 3.952425718307495,
|
203 |
+
"learning_rate": 9.737333333333334e-06,
|
204 |
+
"loss": 0.2123,
|
205 |
"step": 700
|
206 |
},
|
207 |
{
|
208 |
+
"epoch": 0.090625,
|
209 |
+
"grad_norm": 4.892609119415283,
|
210 |
+
"learning_rate": 9.704e-06,
|
211 |
+
"loss": 0.2343,
|
212 |
"step": 725
|
213 |
},
|
214 |
{
|
215 |
+
"epoch": 0.09375,
|
216 |
+
"grad_norm": 4.592615127563477,
|
217 |
+
"learning_rate": 9.670666666666667e-06,
|
218 |
+
"loss": 0.3308,
|
219 |
"step": 750
|
220 |
},
|
221 |
{
|
222 |
+
"epoch": 0.096875,
|
223 |
+
"grad_norm": 4.663967132568359,
|
224 |
+
"learning_rate": 9.637333333333333e-06,
|
225 |
+
"loss": 0.3146,
|
226 |
"step": 775
|
227 |
},
|
228 |
{
|
229 |
+
"epoch": 0.1,
|
230 |
+
"grad_norm": 5.091048717498779,
|
231 |
+
"learning_rate": 9.604000000000002e-06,
|
232 |
+
"loss": 0.3519,
|
233 |
"step": 800
|
234 |
},
|
235 |
{
|
236 |
+
"epoch": 0.103125,
|
237 |
+
"grad_norm": 3.8216071128845215,
|
238 |
+
"learning_rate": 9.570666666666666e-06,
|
239 |
+
"loss": 0.2365,
|
240 |
"step": 825
|
241 |
},
|
242 |
{
|
243 |
+
"epoch": 0.10625,
|
244 |
+
"grad_norm": 3.122516393661499,
|
245 |
+
"learning_rate": 9.537333333333334e-06,
|
246 |
+
"loss": 0.193,
|
247 |
"step": 850
|
248 |
},
|
249 |
{
|
250 |
+
"epoch": 0.109375,
|
251 |
+
"grad_norm": 2.657339096069336,
|
252 |
+
"learning_rate": 9.504e-06,
|
253 |
+
"loss": 0.1759,
|
254 |
"step": 875
|
255 |
},
|
256 |
{
|
257 |
+
"epoch": 0.1125,
|
258 |
+
"grad_norm": 4.554510116577148,
|
259 |
+
"learning_rate": 9.470666666666667e-06,
|
260 |
+
"loss": 0.2387,
|
261 |
"step": 900
|
262 |
},
|
263 |
{
|
264 |
+
"epoch": 0.115625,
|
265 |
+
"grad_norm": 5.045220851898193,
|
266 |
+
"learning_rate": 9.437333333333334e-06,
|
267 |
+
"loss": 0.2845,
|
268 |
"step": 925
|
269 |
},
|
270 |
{
|
271 |
+
"epoch": 0.11875,
|
272 |
+
"grad_norm": 4.260054588317871,
|
273 |
+
"learning_rate": 9.404e-06,
|
274 |
+
"loss": 0.2755,
|
275 |
"step": 950
|
276 |
},
|
277 |
{
|
278 |
+
"epoch": 0.121875,
|
279 |
+
"grad_norm": 5.8209147453308105,
|
280 |
+
"learning_rate": 9.370666666666668e-06,
|
281 |
+
"loss": 0.481,
|
282 |
"step": 975
|
283 |
},
|
284 |
{
|
285 |
+
"epoch": 0.125,
|
286 |
+
"grad_norm": 5.498444557189941,
|
287 |
+
"learning_rate": 9.337333333333335e-06,
|
288 |
+
"loss": 0.3998,
|
289 |
"step": 1000
|
290 |
},
|
291 |
{
|
292 |
+
"epoch": 0.125,
|
293 |
+
"eval_loss": 0.36512792110443115,
|
294 |
+
"eval_runtime": 153.2646,
|
295 |
+
"eval_samples_per_second": 13.728,
|
296 |
+
"eval_steps_per_second": 0.861,
|
297 |
+
"eval_wer": 21.50135552023932,
|
298 |
"step": 1000
|
299 |
},
|
300 |
{
|
301 |
+
"epoch": 0.128125,
|
302 |
+
"grad_norm": 4.732964515686035,
|
303 |
+
"learning_rate": 9.304000000000001e-06,
|
304 |
+
"loss": 0.329,
|
305 |
"step": 1025
|
306 |
},
|
307 |
{
|
308 |
+
"epoch": 0.13125,
|
309 |
+
"grad_norm": 3.3556125164031982,
|
310 |
+
"learning_rate": 9.270666666666667e-06,
|
311 |
+
"loss": 0.2319,
|
312 |
"step": 1050
|
313 |
},
|
314 |
{
|
315 |
+
"epoch": 0.134375,
|
316 |
+
"grad_norm": 2.9708847999572754,
|
317 |
+
"learning_rate": 9.237333333333334e-06,
|
318 |
+
"loss": 0.174,
|
319 |
"step": 1075
|
320 |
},
|
321 |
{
|
322 |
+
"epoch": 0.1375,
|
323 |
+
"grad_norm": 2.841306447982788,
|
324 |
+
"learning_rate": 9.204e-06,
|
325 |
+
"loss": 0.1447,
|
326 |
"step": 1100
|
327 |
},
|
328 |
{
|
329 |
+
"epoch": 0.140625,
|
330 |
+
"grad_norm": 2.7909176349639893,
|
331 |
+
"learning_rate": 9.170666666666668e-06,
|
332 |
+
"loss": 0.1406,
|
333 |
"step": 1125
|
334 |
},
|
335 |
{
|
336 |
+
"epoch": 0.14375,
|
337 |
+
"grad_norm": 3.37842059135437,
|
338 |
+
"learning_rate": 9.137333333333333e-06,
|
339 |
+
"loss": 0.151,
|
340 |
"step": 1150
|
341 |
},
|
342 |
{
|
343 |
+
"epoch": 0.146875,
|
344 |
+
"grad_norm": 3.023977041244507,
|
345 |
+
"learning_rate": 9.104000000000001e-06,
|
346 |
+
"loss": 0.1529,
|
347 |
"step": 1175
|
348 |
},
|
349 |
{
|
350 |
+
"epoch": 0.15,
|
351 |
+
"grad_norm": 3.015974283218384,
|
352 |
+
"learning_rate": 9.070666666666668e-06,
|
353 |
+
"loss": 0.1496,
|
354 |
"step": 1200
|
355 |
},
|
356 |
{
|
357 |
+
"epoch": 0.153125,
|
358 |
+
"grad_norm": 4.30889892578125,
|
359 |
+
"learning_rate": 9.037333333333334e-06,
|
360 |
+
"loss": 0.219,
|
361 |
"step": 1225
|
362 |
},
|
363 |
{
|
364 |
+
"epoch": 0.15625,
|
365 |
+
"grad_norm": 4.160729885101318,
|
366 |
+
"learning_rate": 9.004e-06,
|
367 |
+
"loss": 0.238,
|
368 |
"step": 1250
|
369 |
},
|
370 |
{
|
371 |
+
"epoch": 0.159375,
|
372 |
+
"grad_norm": 4.687659740447998,
|
373 |
+
"learning_rate": 8.970666666666667e-06,
|
374 |
+
"loss": 0.2603,
|
375 |
"step": 1275
|
376 |
},
|
377 |
{
|
378 |
+
"epoch": 0.1625,
|
379 |
+
"grad_norm": 4.577232837677002,
|
380 |
+
"learning_rate": 8.937333333333335e-06,
|
381 |
+
"loss": 0.2666,
|
382 |
"step": 1300
|
383 |
},
|
384 |
{
|
385 |
+
"epoch": 0.165625,
|
386 |
+
"grad_norm": 5.091732501983643,
|
387 |
+
"learning_rate": 8.904e-06,
|
388 |
+
"loss": 0.2337,
|
389 |
"step": 1325
|
390 |
},
|
391 |
{
|
392 |
+
"epoch": 0.16875,
|
393 |
+
"grad_norm": 4.125801086425781,
|
394 |
+
"learning_rate": 8.870666666666668e-06,
|
395 |
+
"loss": 0.2379,
|
396 |
"step": 1350
|
397 |
},
|
398 |
{
|
399 |
+
"epoch": 0.171875,
|
400 |
+
"grad_norm": 5.142183303833008,
|
401 |
+
"learning_rate": 8.837333333333334e-06,
|
402 |
+
"loss": 0.2215,
|
403 |
"step": 1375
|
404 |
},
|
405 |
{
|
406 |
+
"epoch": 0.175,
|
407 |
+
"grad_norm": 4.486277103424072,
|
408 |
+
"learning_rate": 8.804e-06,
|
409 |
+
"loss": 0.2136,
|
410 |
"step": 1400
|
411 |
},
|
412 |
{
|
413 |
+
"epoch": 0.178125,
|
414 |
+
"grad_norm": 3.5466482639312744,
|
415 |
+
"learning_rate": 8.770666666666667e-06,
|
416 |
+
"loss": 0.2214,
|
417 |
"step": 1425
|
418 |
},
|
419 |
{
|
420 |
+
"epoch": 0.18125,
|
421 |
+
"grad_norm": 3.6199097633361816,
|
422 |
+
"learning_rate": 8.737333333333334e-06,
|
423 |
+
"loss": 0.2113,
|
424 |
"step": 1450
|
425 |
},
|
426 |
{
|
427 |
+
"epoch": 0.184375,
|
428 |
+
"grad_norm": 2.559951066970825,
|
429 |
+
"learning_rate": 8.704e-06,
|
430 |
+
"loss": 0.1552,
|
431 |
"step": 1475
|
432 |
},
|
433 |
{
|
434 |
+
"epoch": 0.1875,
|
435 |
+
"grad_norm": 2.9152133464813232,
|
436 |
+
"learning_rate": 8.670666666666666e-06,
|
437 |
+
"loss": 0.1354,
|
438 |
"step": 1500
|
439 |
},
|
440 |
{
|
441 |
+
"epoch": 0.190625,
|
442 |
+
"grad_norm": 2.608732223510742,
|
443 |
+
"learning_rate": 8.637333333333335e-06,
|
444 |
+
"loss": 0.144,
|
445 |
"step": 1525
|
446 |
},
|
447 |
{
|
448 |
+
"epoch": 0.19375,
|
449 |
+
"grad_norm": 4.0043416023254395,
|
450 |
+
"learning_rate": 8.604000000000001e-06,
|
451 |
+
"loss": 0.1367,
|
452 |
"step": 1550
|
453 |
},
|
454 |
{
|
455 |
+
"epoch": 0.196875,
|
456 |
+
"grad_norm": 2.3621206283569336,
|
457 |
+
"learning_rate": 8.570666666666667e-06,
|
458 |
+
"loss": 0.1194,
|
459 |
"step": 1575
|
460 |
},
|
461 |
{
|
462 |
+
"epoch": 0.2,
|
463 |
+
"grad_norm": 2.6970181465148926,
|
464 |
+
"learning_rate": 8.537333333333334e-06,
|
465 |
+
"loss": 0.1283,
|
466 |
"step": 1600
|
467 |
},
|
468 |
{
|
469 |
+
"epoch": 0.203125,
|
470 |
+
"grad_norm": 4.737370014190674,
|
471 |
+
"learning_rate": 8.504000000000002e-06,
|
472 |
+
"loss": 0.1858,
|
473 |
"step": 1625
|
474 |
},
|
475 |
{
|
476 |
+
"epoch": 0.20625,
|
477 |
+
"grad_norm": 3.462738513946533,
|
478 |
+
"learning_rate": 8.470666666666667e-06,
|
479 |
+
"loss": 0.1995,
|
480 |
"step": 1650
|
481 |
},
|
482 |
{
|
483 |
+
"epoch": 0.209375,
|
484 |
+
"grad_norm": 4.608364582061768,
|
485 |
+
"learning_rate": 8.437333333333335e-06,
|
486 |
+
"loss": 0.2028,
|
487 |
"step": 1675
|
488 |
},
|
489 |
{
|
490 |
+
"epoch": 0.2125,
|
491 |
+
"grad_norm": 2.770601987838745,
|
492 |
+
"learning_rate": 8.404000000000001e-06,
|
493 |
+
"loss": 0.1952,
|
494 |
"step": 1700
|
495 |
},
|
496 |
{
|
497 |
+
"epoch": 0.215625,
|
498 |
+
"grad_norm": 3.041656017303467,
|
499 |
+
"learning_rate": 8.370666666666668e-06,
|
500 |
+
"loss": 0.1464,
|
501 |
"step": 1725
|
502 |
},
|
503 |
{
|
504 |
+
"epoch": 0.21875,
|
505 |
+
"grad_norm": 2.988032102584839,
|
506 |
+
"learning_rate": 8.337333333333334e-06,
|
507 |
+
"loss": 0.1424,
|
508 |
"step": 1750
|
509 |
},
|
510 |
{
|
511 |
+
"epoch": 0.221875,
|
512 |
+
"grad_norm": 3.0646026134490967,
|
513 |
+
"learning_rate": 8.304e-06,
|
514 |
+
"loss": 0.1233,
|
515 |
"step": 1775
|
516 |
},
|
517 |
{
|
518 |
+
"epoch": 0.225,
|
519 |
+
"grad_norm": 2.617403268814087,
|
520 |
+
"learning_rate": 8.270666666666667e-06,
|
521 |
+
"loss": 0.1384,
|
522 |
"step": 1800
|
523 |
},
|
524 |
{
|
525 |
+
"epoch": 0.228125,
|
526 |
+
"grad_norm": 2.6170425415039062,
|
527 |
+
"learning_rate": 8.237333333333333e-06,
|
528 |
+
"loss": 0.1208,
|
529 |
"step": 1825
|
530 |
},
|
531 |
{
|
532 |
+
"epoch": 0.23125,
|
533 |
+
"grad_norm": 2.1296098232269287,
|
534 |
+
"learning_rate": 8.204000000000001e-06,
|
535 |
+
"loss": 0.1176,
|
536 |
"step": 1850
|
537 |
},
|
538 |
{
|
539 |
+
"epoch": 0.234375,
|
540 |
+
"grad_norm": 2.767275810241699,
|
541 |
+
"learning_rate": 8.170666666666668e-06,
|
542 |
+
"loss": 0.1189,
|
543 |
"step": 1875
|
544 |
},
|
545 |
{
|
546 |
+
"epoch": 0.2375,
|
547 |
+
"grad_norm": 2.7053661346435547,
|
548 |
+
"learning_rate": 8.137333333333334e-06,
|
549 |
+
"loss": 0.1211,
|
550 |
"step": 1900
|
551 |
},
|
552 |
{
|
553 |
+
"epoch": 0.240625,
|
554 |
+
"grad_norm": 2.281399965286255,
|
555 |
+
"learning_rate": 8.104e-06,
|
556 |
+
"loss": 0.1156,
|
557 |
"step": 1925
|
558 |
},
|
559 |
{
|
560 |
+
"epoch": 0.24375,
|
561 |
+
"grad_norm": 3.7013635635375977,
|
562 |
+
"learning_rate": 8.070666666666667e-06,
|
563 |
+
"loss": 0.1517,
|
564 |
"step": 1950
|
565 |
},
|
566 |
{
|
567 |
+
"epoch": 0.246875,
|
568 |
+
"grad_norm": 3.7125532627105713,
|
569 |
+
"learning_rate": 8.037333333333334e-06,
|
570 |
+
"loss": 0.2002,
|
571 |
"step": 1975
|
572 |
},
|
573 |
{
|
574 |
+
"epoch": 0.25,
|
575 |
+
"grad_norm": 3.8716859817504883,
|
576 |
+
"learning_rate": 8.004e-06,
|
577 |
+
"loss": 0.1975,
|
578 |
"step": 2000
|
579 |
},
|
580 |
{
|
581 |
+
"epoch": 0.25,
|
582 |
+
"eval_loss": 0.2918355464935303,
|
583 |
+
"eval_runtime": 153.1763,
|
584 |
+
"eval_samples_per_second": 13.736,
|
585 |
+
"eval_steps_per_second": 0.862,
|
586 |
+
"eval_wer": 15.873609423202767,
|
587 |
"step": 2000
|
588 |
},
|
589 |
{
|
590 |
+
"epoch": 0.253125,
|
591 |
+
"grad_norm": 2.4911813735961914,
|
592 |
+
"learning_rate": 7.970666666666668e-06,
|
593 |
+
"loss": 0.1648,
|
594 |
"step": 2025
|
595 |
},
|
596 |
{
|
597 |
+
"epoch": 0.25625,
|
598 |
+
"grad_norm": 2.604146718978882,
|
599 |
+
"learning_rate": 7.937333333333333e-06,
|
600 |
+
"loss": 0.1162,
|
601 |
"step": 2050
|
602 |
},
|
603 |
{
|
604 |
+
"epoch": 0.259375,
|
605 |
+
"grad_norm": 2.7352280616760254,
|
606 |
+
"learning_rate": 7.904000000000001e-06,
|
607 |
+
"loss": 0.1135,
|
608 |
"step": 2075
|
609 |
},
|
610 |
{
|
611 |
+
"epoch": 0.2625,
|
612 |
+
"grad_norm": 2.2932169437408447,
|
613 |
+
"learning_rate": 7.870666666666667e-06,
|
614 |
+
"loss": 0.1153,
|
615 |
"step": 2100
|
616 |
},
|
617 |
{
|
618 |
+
"epoch": 0.265625,
|
619 |
+
"grad_norm": 3.1734797954559326,
|
620 |
+
"learning_rate": 7.837333333333334e-06,
|
621 |
+
"loss": 0.1005,
|
622 |
"step": 2125
|
623 |
},
|
624 |
{
|
625 |
+
"epoch": 0.26875,
|
626 |
+
"grad_norm": 2.4353103637695312,
|
627 |
+
"learning_rate": 7.804e-06,
|
628 |
+
"loss": 0.0988,
|
629 |
"step": 2150
|
630 |
},
|
631 |
{
|
632 |
+
"epoch": 0.271875,
|
633 |
+
"grad_norm": 2.8655478954315186,
|
634 |
+
"learning_rate": 7.770666666666668e-06,
|
635 |
+
"loss": 0.1028,
|
636 |
"step": 2175
|
637 |
},
|
638 |
{
|
639 |
+
"epoch": 0.275,
|
640 |
+
"grad_norm": 3.800967216491699,
|
641 |
+
"learning_rate": 7.737333333333335e-06,
|
642 |
+
"loss": 0.1751,
|
643 |
"step": 2200
|
644 |
},
|
645 |
{
|
646 |
+
"epoch": 0.278125,
|
647 |
+
"grad_norm": 4.212419509887695,
|
648 |
+
"learning_rate": 7.704000000000001e-06,
|
649 |
+
"loss": 0.1798,
|
650 |
"step": 2225
|
651 |
},
|
652 |
{
|
653 |
+
"epoch": 0.28125,
|
654 |
+
"grad_norm": 3.5863020420074463,
|
655 |
+
"learning_rate": 7.670666666666668e-06,
|
656 |
+
"loss": 0.199,
|
657 |
"step": 2250
|
658 |
},
|
659 |
{
|
660 |
+
"epoch": 0.284375,
|
661 |
+
"grad_norm": 3.1013996601104736,
|
662 |
+
"learning_rate": 7.637333333333334e-06,
|
663 |
+
"loss": 0.1335,
|
664 |
"step": 2275
|
665 |
},
|
666 |
{
|
667 |
+
"epoch": 0.2875,
|
668 |
+
"grad_norm": 2.2462713718414307,
|
669 |
+
"learning_rate": 7.604e-06,
|
670 |
+
"loss": 0.0976,
|
671 |
"step": 2300
|
672 |
},
|
673 |
{
|
674 |
+
"epoch": 0.290625,
|
675 |
+
"grad_norm": 2.9669203758239746,
|
676 |
+
"learning_rate": 7.570666666666668e-06,
|
677 |
+
"loss": 0.0946,
|
678 |
"step": 2325
|
679 |
},
|
680 |
{
|
681 |
+
"epoch": 0.29375,
|
682 |
+
"grad_norm": 2.645289897918701,
|
683 |
+
"learning_rate": 7.537333333333334e-06,
|
684 |
+
"loss": 0.0935,
|
685 |
"step": 2350
|
686 |
},
|
687 |
{
|
688 |
+
"epoch": 0.296875,
|
689 |
+
"grad_norm": 1.9715274572372437,
|
690 |
+
"learning_rate": 7.5040000000000005e-06,
|
691 |
+
"loss": 0.1045,
|
692 |
"step": 2375
|
693 |
},
|
694 |
{
|
695 |
+
"epoch": 0.3,
|
696 |
+
"grad_norm": 2.1423373222351074,
|
697 |
+
"learning_rate": 7.470666666666667e-06,
|
698 |
+
"loss": 0.0977,
|
699 |
"step": 2400
|
700 |
},
|
701 |
{
|
702 |
+
"epoch": 0.303125,
|
703 |
+
"grad_norm": 2.029958963394165,
|
704 |
+
"learning_rate": 7.437333333333334e-06,
|
705 |
+
"loss": 0.1061,
|
706 |
"step": 2425
|
707 |
},
|
708 |
{
|
709 |
+
"epoch": 0.30625,
|
710 |
+
"grad_norm": 1.972732663154602,
|
711 |
+
"learning_rate": 7.404e-06,
|
712 |
+
"loss": 0.0998,
|
713 |
"step": 2450
|
714 |
},
|
715 |
{
|
716 |
+
"epoch": 0.309375,
|
717 |
+
"grad_norm": 2.2875239849090576,
|
718 |
+
"learning_rate": 7.370666666666667e-06,
|
719 |
+
"loss": 0.1068,
|
720 |
"step": 2475
|
721 |
},
|
722 |
{
|
723 |
+
"epoch": 0.3125,
|
724 |
+
"grad_norm": 3.1778981685638428,
|
725 |
+
"learning_rate": 7.337333333333334e-06,
|
726 |
+
"loss": 0.1168,
|
727 |
"step": 2500
|
728 |
},
|
729 |
{
|
730 |
+
"epoch": 0.315625,
|
731 |
+
"grad_norm": 3.360576868057251,
|
732 |
+
"learning_rate": 7.304000000000001e-06,
|
733 |
+
"loss": 0.1524,
|
734 |
"step": 2525
|
735 |
},
|
736 |
{
|
737 |
+
"epoch": 0.31875,
|
738 |
+
"grad_norm": 3.5467047691345215,
|
739 |
+
"learning_rate": 7.270666666666667e-06,
|
740 |
+
"loss": 0.1483,
|
741 |
"step": 2550
|
742 |
},
|
743 |
{
|
744 |
+
"epoch": 0.321875,
|
745 |
+
"grad_norm": 3.488696575164795,
|
746 |
+
"learning_rate": 7.237333333333334e-06,
|
747 |
+
"loss": 0.1775,
|
748 |
"step": 2575
|
749 |
},
|
750 |
{
|
751 |
+
"epoch": 0.325,
|
752 |
+
"grad_norm": 2.8800296783447266,
|
753 |
+
"learning_rate": 7.204000000000001e-06,
|
754 |
+
"loss": 0.135,
|
755 |
"step": 2600
|
756 |
},
|
757 |
{
|
758 |
+
"epoch": 0.328125,
|
759 |
+
"grad_norm": 3.1020660400390625,
|
760 |
+
"learning_rate": 7.170666666666667e-06,
|
761 |
+
"loss": 0.1108,
|
762 |
"step": 2625
|
763 |
},
|
764 |
{
|
765 |
+
"epoch": 0.33125,
|
766 |
+
"grad_norm": 2.1233720779418945,
|
767 |
+
"learning_rate": 7.137333333333334e-06,
|
768 |
+
"loss": 0.1002,
|
769 |
"step": 2650
|
770 |
},
|
771 |
{
|
772 |
+
"epoch": 0.334375,
|
773 |
+
"grad_norm": 2.393425703048706,
|
774 |
+
"learning_rate": 7.104000000000001e-06,
|
775 |
+
"loss": 0.0941,
|
776 |
"step": 2675
|
777 |
},
|
778 |
{
|
779 |
+
"epoch": 0.3375,
|
780 |
+
"grad_norm": 2.295924186706543,
|
781 |
+
"learning_rate": 7.0706666666666665e-06,
|
782 |
+
"loss": 0.0959,
|
783 |
"step": 2700
|
784 |
},
|
785 |
{
|
786 |
+
"epoch": 0.340625,
|
787 |
+
"grad_norm": 1.8125039339065552,
|
788 |
+
"learning_rate": 7.037333333333334e-06,
|
789 |
+
"loss": 0.1116,
|
790 |
"step": 2725
|
791 |
},
|
792 |
{
|
793 |
+
"epoch": 0.34375,
|
794 |
+
"grad_norm": 3.006834030151367,
|
795 |
+
"learning_rate": 7.004000000000001e-06,
|
796 |
+
"loss": 0.1146,
|
797 |
"step": 2750
|
798 |
},
|
799 |
{
|
800 |
+
"epoch": 0.346875,
|
801 |
+
"grad_norm": 4.171006679534912,
|
802 |
+
"learning_rate": 6.970666666666667e-06,
|
803 |
+
"loss": 0.2029,
|
804 |
"step": 2775
|
805 |
},
|
806 |
{
|
807 |
+
"epoch": 0.35,
|
808 |
+
"grad_norm": 3.68646240234375,
|
809 |
+
"learning_rate": 6.937333333333334e-06,
|
810 |
+
"loss": 0.1913,
|
811 |
"step": 2800
|
812 |
},
|
813 |
{
|
814 |
+
"epoch": 0.353125,
|
815 |
+
"grad_norm": 3.7463300228118896,
|
816 |
+
"learning_rate": 6.904e-06,
|
817 |
+
"loss": 0.16,
|
818 |
"step": 2825
|
819 |
},
|
820 |
{
|
821 |
+
"epoch": 0.35625,
|
822 |
+
"grad_norm": 3.069136381149292,
|
823 |
+
"learning_rate": 6.8706666666666676e-06,
|
824 |
+
"loss": 0.1571,
|
825 |
"step": 2850
|
826 |
},
|
827 |
{
|
828 |
+
"epoch": 0.359375,
|
829 |
+
"grad_norm": 3.17172908782959,
|
830 |
+
"learning_rate": 6.837333333333334e-06,
|
831 |
+
"loss": 0.1608,
|
832 |
"step": 2875
|
833 |
},
|
834 |
{
|
835 |
+
"epoch": 0.3625,
|
836 |
+
"grad_norm": 3.1673102378845215,
|
837 |
+
"learning_rate": 6.804e-06,
|
838 |
+
"loss": 0.1546,
|
839 |
"step": 2900
|
840 |
},
|
841 |
{
|
842 |
+
"epoch": 0.365625,
|
843 |
+
"grad_norm": 2.344193935394287,
|
844 |
+
"learning_rate": 6.770666666666668e-06,
|
845 |
+
"loss": 0.1282,
|
846 |
"step": 2925
|
847 |
},
|
848 |
{
|
849 |
+
"epoch": 0.36875,
|
850 |
+
"grad_norm": 2.5321226119995117,
|
851 |
+
"learning_rate": 6.737333333333333e-06,
|
852 |
+
"loss": 0.0979,
|
853 |
"step": 2950
|
854 |
},
|
855 |
{
|
856 |
+
"epoch": 0.371875,
|
857 |
+
"grad_norm": 2.2652363777160645,
|
858 |
+
"learning_rate": 6.7040000000000005e-06,
|
859 |
+
"loss": 0.1049,
|
860 |
"step": 2975
|
861 |
},
|
862 |
{
|
863 |
+
"epoch": 0.375,
|
864 |
+
"grad_norm": 2.7856993675231934,
|
865 |
+
"learning_rate": 6.670666666666668e-06,
|
866 |
+
"loss": 0.1433,
|
867 |
"step": 3000
|
868 |
},
|
869 |
{
|
870 |
+
"epoch": 0.375,
|
871 |
+
"eval_loss": 0.2720916271209717,
|
872 |
+
"eval_runtime": 151.7576,
|
873 |
+
"eval_samples_per_second": 13.864,
|
874 |
+
"eval_steps_per_second": 0.87,
|
875 |
+
"eval_wer": 13.9010937646069,
|
876 |
"step": 3000
|
877 |
},
|
878 |
{
|
879 |
+
"epoch": 0.378125,
|
880 |
+
"grad_norm": 4.214677810668945,
|
881 |
+
"learning_rate": 6.637333333333333e-06,
|
882 |
+
"loss": 0.1758,
|
883 |
"step": 3025
|
884 |
},
|
885 |
{
|
886 |
+
"epoch": 0.38125,
|
887 |
+
"grad_norm": 4.144543647766113,
|
888 |
+
"learning_rate": 6.604000000000001e-06,
|
889 |
+
"loss": 0.1972,
|
890 |
"step": 3050
|
891 |
},
|
892 |
{
|
893 |
+
"epoch": 0.384375,
|
894 |
+
"grad_norm": 2.1775295734405518,
|
895 |
+
"learning_rate": 6.570666666666667e-06,
|
896 |
+
"loss": 0.1293,
|
897 |
"step": 3075
|
898 |
},
|
899 |
{
|
900 |
+
"epoch": 0.3875,
|
901 |
+
"grad_norm": 2.796152353286743,
|
902 |
+
"learning_rate": 6.537333333333334e-06,
|
903 |
+
"loss": 0.099,
|
904 |
"step": 3100
|
905 |
},
|
906 |
{
|
907 |
+
"epoch": 0.390625,
|
908 |
+
"grad_norm": 2.1920204162597656,
|
909 |
+
"learning_rate": 6.504e-06,
|
910 |
+
"loss": 0.0945,
|
911 |
"step": 3125
|
912 |
},
|
913 |
{
|
914 |
+
"epoch": 0.39375,
|
915 |
+
"grad_norm": 2.8689582347869873,
|
916 |
+
"learning_rate": 6.470666666666667e-06,
|
917 |
+
"loss": 0.1118,
|
918 |
"step": 3150
|
919 |
},
|
920 |
{
|
921 |
+
"epoch": 0.396875,
|
922 |
+
"grad_norm": 3.580993175506592,
|
923 |
+
"learning_rate": 6.4373333333333344e-06,
|
924 |
+
"loss": 0.1732,
|
925 |
"step": 3175
|
926 |
},
|
927 |
{
|
928 |
+
"epoch": 0.4,
|
929 |
+
"grad_norm": 3.9165573120117188,
|
930 |
+
"learning_rate": 6.404e-06,
|
931 |
+
"loss": 0.1581,
|
932 |
"step": 3200
|
933 |
},
|
934 |
{
|
935 |
+
"epoch": 0.403125,
|
936 |
+
"grad_norm": 3.8235292434692383,
|
937 |
+
"learning_rate": 6.370666666666667e-06,
|
938 |
+
"loss": 0.1716,
|
939 |
"step": 3225
|
940 |
},
|
941 |
{
|
942 |
+
"epoch": 0.40625,
|
943 |
+
"grad_norm": 3.21138072013855,
|
944 |
+
"learning_rate": 6.3373333333333345e-06,
|
945 |
+
"loss": 0.1364,
|
946 |
"step": 3250
|
947 |
},
|
948 |
{
|
949 |
+
"epoch": 0.409375,
|
950 |
+
"grad_norm": 3.925539255142212,
|
951 |
+
"learning_rate": 6.304e-06,
|
952 |
+
"loss": 0.1459,
|
953 |
"step": 3275
|
954 |
},
|
955 |
{
|
956 |
+
"epoch": 0.4125,
|
957 |
+
"grad_norm": 3.062764883041382,
|
958 |
+
"learning_rate": 6.270666666666667e-06,
|
959 |
+
"loss": 0.1668,
|
960 |
"step": 3300
|
961 |
},
|
962 |
{
|
963 |
+
"epoch": 0.415625,
|
964 |
+
"grad_norm": 2.8379392623901367,
|
965 |
+
"learning_rate": 6.237333333333334e-06,
|
966 |
+
"loss": 0.1243,
|
967 |
"step": 3325
|
968 |
},
|
969 |
{
|
970 |
+
"epoch": 0.41875,
|
971 |
+
"grad_norm": 2.979661226272583,
|
972 |
+
"learning_rate": 6.204e-06,
|
973 |
+
"loss": 0.0979,
|
974 |
"step": 3350
|
975 |
},
|
976 |
{
|
977 |
+
"epoch": 0.421875,
|
978 |
+
"grad_norm": 2.4838883876800537,
|
979 |
+
"learning_rate": 6.170666666666667e-06,
|
980 |
+
"loss": 0.0848,
|
981 |
"step": 3375
|
982 |
},
|
983 |
{
|
984 |
+
"epoch": 0.425,
|
985 |
+
"grad_norm": 2.3293073177337646,
|
986 |
+
"learning_rate": 6.137333333333334e-06,
|
987 |
+
"loss": 0.0927,
|
988 |
"step": 3400
|
989 |
},
|
990 |
{
|
991 |
+
"epoch": 0.428125,
|
992 |
+
"grad_norm": 3.3497400283813477,
|
993 |
+
"learning_rate": 6.104000000000001e-06,
|
994 |
+
"loss": 0.0976,
|
995 |
"step": 3425
|
996 |
},
|
997 |
{
|
998 |
+
"epoch": 0.43125,
|
999 |
+
"grad_norm": 2.0302255153656006,
|
1000 |
+
"learning_rate": 6.070666666666667e-06,
|
1001 |
+
"loss": 0.0881,
|
1002 |
"step": 3450
|
1003 |
},
|
1004 |
{
|
1005 |
+
"epoch": 0.434375,
|
1006 |
+
"grad_norm": 2.112396001815796,
|
1007 |
+
"learning_rate": 6.037333333333334e-06,
|
1008 |
+
"loss": 0.0828,
|
1009 |
"step": 3475
|
1010 |
},
|
1011 |
{
|
1012 |
+
"epoch": 0.4375,
|
1013 |
+
"grad_norm": 2.513197183609009,
|
1014 |
+
"learning_rate": 6.004000000000001e-06,
|
1015 |
+
"loss": 0.0983,
|
1016 |
"step": 3500
|
1017 |
},
|
1018 |
{
|
1019 |
+
"epoch": 0.440625,
|
1020 |
+
"grad_norm": 2.1429622173309326,
|
1021 |
+
"learning_rate": 5.970666666666667e-06,
|
1022 |
+
"loss": 0.0929,
|
1023 |
"step": 3525
|
1024 |
},
|
1025 |
{
|
1026 |
+
"epoch": 0.44375,
|
1027 |
+
"grad_norm": 2.7300236225128174,
|
1028 |
+
"learning_rate": 5.937333333333334e-06,
|
1029 |
+
"loss": 0.0916,
|
1030 |
"step": 3550
|
1031 |
},
|
1032 |
{
|
1033 |
+
"epoch": 0.446875,
|
1034 |
+
"grad_norm": 4.011541366577148,
|
1035 |
+
"learning_rate": 5.9040000000000006e-06,
|
1036 |
+
"loss": 0.1426,
|
1037 |
"step": 3575
|
1038 |
},
|
1039 |
{
|
1040 |
+
"epoch": 0.45,
|
1041 |
+
"grad_norm": 3.1994545459747314,
|
1042 |
+
"learning_rate": 5.870666666666667e-06,
|
1043 |
+
"loss": 0.163,
|
1044 |
"step": 3600
|
1045 |
},
|
1046 |
{
|
1047 |
+
"epoch": 0.453125,
|
1048 |
+
"grad_norm": 2.98388934135437,
|
1049 |
+
"learning_rate": 5.837333333333333e-06,
|
1050 |
+
"loss": 0.1568,
|
1051 |
"step": 3625
|
1052 |
},
|
1053 |
{
|
1054 |
+
"epoch": 0.45625,
|
1055 |
+
"grad_norm": 2.4515798091888428,
|
1056 |
+
"learning_rate": 5.804000000000001e-06,
|
1057 |
+
"loss": 0.0937,
|
1058 |
"step": 3650
|
1059 |
},
|
1060 |
{
|
1061 |
+
"epoch": 0.459375,
|
1062 |
+
"grad_norm": 2.0767834186553955,
|
1063 |
+
"learning_rate": 5.770666666666666e-06,
|
1064 |
+
"loss": 0.0861,
|
1065 |
"step": 3675
|
1066 |
},
|
1067 |
{
|
1068 |
+
"epoch": 0.4625,
|
1069 |
+
"grad_norm": 2.601104974746704,
|
1070 |
+
"learning_rate": 5.7373333333333335e-06,
|
1071 |
+
"loss": 0.0917,
|
1072 |
"step": 3700
|
1073 |
},
|
1074 |
{
|
1075 |
+
"epoch": 0.465625,
|
1076 |
+
"grad_norm": 2.593489408493042,
|
1077 |
+
"learning_rate": 5.704000000000001e-06,
|
1078 |
+
"loss": 0.1022,
|
1079 |
"step": 3725
|
1080 |
},
|
1081 |
{
|
1082 |
+
"epoch": 0.46875,
|
1083 |
+
"grad_norm": 3.5832834243774414,
|
1084 |
+
"learning_rate": 5.670666666666668e-06,
|
1085 |
+
"loss": 0.1304,
|
1086 |
"step": 3750
|
1087 |
},
|
1088 |
{
|
1089 |
+
"epoch": 0.471875,
|
1090 |
+
"grad_norm": 3.4403560161590576,
|
1091 |
+
"learning_rate": 5.637333333333334e-06,
|
1092 |
+
"loss": 0.1634,
|
1093 |
"step": 3775
|
1094 |
},
|
1095 |
{
|
1096 |
+
"epoch": 0.475,
|
1097 |
+
"grad_norm": 3.6842737197875977,
|
1098 |
+
"learning_rate": 5.604000000000001e-06,
|
1099 |
+
"loss": 0.1683,
|
1100 |
"step": 3800
|
1101 |
},
|
1102 |
{
|
1103 |
+
"epoch": 0.478125,
|
1104 |
+
"grad_norm": 3.8382315635681152,
|
1105 |
+
"learning_rate": 5.570666666666667e-06,
|
1106 |
+
"loss": 0.1538,
|
1107 |
"step": 3825
|
1108 |
},
|
1109 |
{
|
1110 |
+
"epoch": 0.48125,
|
1111 |
+
"grad_norm": 4.207257270812988,
|
1112 |
+
"learning_rate": 5.537333333333334e-06,
|
1113 |
+
"loss": 0.165,
|
1114 |
"step": 3850
|
1115 |
},
|
1116 |
{
|
1117 |
+
"epoch": 0.484375,
|
1118 |
+
"grad_norm": 2.4130444526672363,
|
1119 |
+
"learning_rate": 5.504e-06,
|
1120 |
+
"loss": 0.1558,
|
1121 |
"step": 3875
|
1122 |
},
|
1123 |
{
|
1124 |
+
"epoch": 0.4875,
|
1125 |
+
"grad_norm": 2.3981151580810547,
|
1126 |
+
"learning_rate": 5.4706666666666674e-06,
|
1127 |
+
"loss": 0.1096,
|
1128 |
"step": 3900
|
1129 |
},
|
1130 |
{
|
1131 |
+
"epoch": 0.490625,
|
1132 |
+
"grad_norm": 2.2837915420532227,
|
1133 |
+
"learning_rate": 5.437333333333333e-06,
|
1134 |
+
"loss": 0.0937,
|
1135 |
"step": 3925
|
1136 |
},
|
1137 |
{
|
1138 |
+
"epoch": 0.49375,
|
1139 |
+
"grad_norm": 2.6647775173187256,
|
1140 |
+
"learning_rate": 5.404e-06,
|
1141 |
+
"loss": 0.0876,
|
1142 |
"step": 3950
|
1143 |
},
|
1144 |
{
|
1145 |
+
"epoch": 0.496875,
|
1146 |
+
"grad_norm": 3.7677643299102783,
|
1147 |
+
"learning_rate": 5.3706666666666675e-06,
|
1148 |
+
"loss": 0.15,
|
1149 |
"step": 3975
|
1150 |
},
|
1151 |
{
|
1152 |
+
"epoch": 0.5,
|
1153 |
+
"grad_norm": 3.542175769805908,
|
1154 |
+
"learning_rate": 5.337333333333333e-06,
|
1155 |
+
"loss": 0.1925,
|
1156 |
"step": 4000
|
1157 |
},
|
1158 |
{
|
1159 |
+
"epoch": 0.5,
|
1160 |
+
"eval_loss": 0.25648659467697144,
|
1161 |
+
"eval_runtime": 150.6646,
|
1162 |
+
"eval_samples_per_second": 13.965,
|
1163 |
+
"eval_steps_per_second": 0.876,
|
1164 |
+
"eval_wer": 12.7372160418809,
|
1165 |
"step": 4000
|
1166 |
},
|
1167 |
{
|
1168 |
+
"epoch": 0.503125,
|
1169 |
+
"grad_norm": 2.5672571659088135,
|
1170 |
+
"learning_rate": 5.304e-06,
|
1171 |
+
"loss": 0.1434,
|
1172 |
"step": 4025
|
1173 |
},
|
1174 |
{
|
1175 |
+
"epoch": 0.50625,
|
1176 |
+
"grad_norm": 4.591808319091797,
|
1177 |
+
"learning_rate": 5.270666666666668e-06,
|
1178 |
+
"loss": 0.2075,
|
1179 |
"step": 4050
|
1180 |
},
|
1181 |
{
|
1182 |
+
"epoch": 0.509375,
|
1183 |
+
"grad_norm": 3.485185146331787,
|
1184 |
+
"learning_rate": 5.237333333333334e-06,
|
1185 |
+
"loss": 0.1478,
|
1186 |
"step": 4075
|
1187 |
},
|
1188 |
{
|
1189 |
+
"epoch": 0.5125,
|
1190 |
+
"grad_norm": 2.5995991230010986,
|
1191 |
+
"learning_rate": 5.2040000000000005e-06,
|
1192 |
+
"loss": 0.1383,
|
1193 |
"step": 4100
|
1194 |
},
|
1195 |
{
|
1196 |
+
"epoch": 0.515625,
|
1197 |
+
"grad_norm": 2.4682819843292236,
|
1198 |
+
"learning_rate": 5.170666666666667e-06,
|
1199 |
+
"loss": 0.0959,
|
1200 |
"step": 4125
|
1201 |
},
|
1202 |
{
|
1203 |
+
"epoch": 0.51875,
|
1204 |
+
"grad_norm": 2.436518669128418,
|
1205 |
+
"learning_rate": 5.137333333333334e-06,
|
1206 |
+
"loss": 0.0857,
|
1207 |
"step": 4150
|
1208 |
},
|
1209 |
{
|
1210 |
+
"epoch": 0.521875,
|
1211 |
+
"grad_norm": 2.0344107151031494,
|
1212 |
+
"learning_rate": 5.104e-06,
|
1213 |
+
"loss": 0.0862,
|
1214 |
"step": 4175
|
1215 |
},
|
1216 |
{
|
1217 |
+
"epoch": 0.525,
|
1218 |
+
"grad_norm": 1.6771937608718872,
|
1219 |
+
"learning_rate": 5.070666666666667e-06,
|
1220 |
+
"loss": 0.0808,
|
1221 |
"step": 4200
|
1222 |
},
|
1223 |
{
|
1224 |
+
"epoch": 0.528125,
|
1225 |
+
"grad_norm": 1.7831439971923828,
|
1226 |
+
"learning_rate": 5.037333333333334e-06,
|
1227 |
+
"loss": 0.0872,
|
1228 |
"step": 4225
|
1229 |
},
|
1230 |
{
|
1231 |
+
"epoch": 0.53125,
|
1232 |
+
"grad_norm": 2.228795051574707,
|
1233 |
+
"learning_rate": 5.004e-06,
|
1234 |
+
"loss": 0.0832,
|
1235 |
"step": 4250
|
1236 |
},
|
1237 |
{
|
1238 |
+
"epoch": 0.534375,
|
1239 |
+
"grad_norm": 3.1402647495269775,
|
1240 |
+
"learning_rate": 4.970666666666667e-06,
|
1241 |
+
"loss": 0.0927,
|
1242 |
"step": 4275
|
1243 |
},
|
1244 |
{
|
1245 |
+
"epoch": 0.5375,
|
1246 |
+
"grad_norm": 3.662506580352783,
|
1247 |
+
"learning_rate": 4.937333333333334e-06,
|
1248 |
+
"loss": 0.1477,
|
1249 |
"step": 4300
|
1250 |
},
|
1251 |
{
|
1252 |
+
"epoch": 0.540625,
|
1253 |
+
"grad_norm": 2.865934371948242,
|
1254 |
+
"learning_rate": 4.904000000000001e-06,
|
1255 |
+
"loss": 0.1262,
|
1256 |
"step": 4325
|
1257 |
},
|
1258 |
{
|
1259 |
+
"epoch": 0.54375,
|
1260 |
+
"grad_norm": 3.2233200073242188,
|
1261 |
+
"learning_rate": 4.870666666666667e-06,
|
1262 |
+
"loss": 0.1329,
|
1263 |
"step": 4350
|
1264 |
},
|
1265 |
{
|
1266 |
+
"epoch": 0.546875,
|
1267 |
+
"grad_norm": 2.093703269958496,
|
1268 |
+
"learning_rate": 4.837333333333334e-06,
|
1269 |
+
"loss": 0.0795,
|
1270 |
"step": 4375
|
1271 |
},
|
1272 |
{
|
1273 |
+
"epoch": 0.55,
|
1274 |
+
"grad_norm": 1.7601807117462158,
|
1275 |
+
"learning_rate": 4.804e-06,
|
1276 |
+
"loss": 0.0715,
|
1277 |
"step": 4400
|
1278 |
},
|
1279 |
{
|
1280 |
+
"epoch": 0.553125,
|
1281 |
+
"grad_norm": 2.1606643199920654,
|
1282 |
+
"learning_rate": 4.770666666666667e-06,
|
1283 |
+
"loss": 0.0797,
|
1284 |
"step": 4425
|
1285 |
},
|
1286 |
{
|
1287 |
+
"epoch": 0.55625,
|
1288 |
+
"grad_norm": 2.565343141555786,
|
1289 |
+
"learning_rate": 4.737333333333334e-06,
|
1290 |
+
"loss": 0.0883,
|
1291 |
"step": 4450
|
1292 |
},
|
1293 |
{
|
1294 |
+
"epoch": 0.559375,
|
1295 |
+
"grad_norm": 2.062619924545288,
|
1296 |
+
"learning_rate": 4.704e-06,
|
1297 |
+
"loss": 0.0965,
|
1298 |
"step": 4475
|
1299 |
},
|
1300 |
{
|
1301 |
+
"epoch": 0.5625,
|
1302 |
+
"grad_norm": 2.2219879627227783,
|
1303 |
+
"learning_rate": 4.6706666666666675e-06,
|
1304 |
+
"loss": 0.0891,
|
1305 |
"step": 4500
|
1306 |
},
|
1307 |
{
|
1308 |
+
"epoch": 0.565625,
|
1309 |
+
"grad_norm": 2.857029676437378,
|
1310 |
+
"learning_rate": 4.637333333333334e-06,
|
1311 |
+
"loss": 0.1147,
|
1312 |
"step": 4525
|
1313 |
},
|
1314 |
{
|
1315 |
+
"epoch": 0.56875,
|
1316 |
+
"grad_norm": 3.090247392654419,
|
1317 |
+
"learning_rate": 4.604e-06,
|
1318 |
+
"loss": 0.144,
|
1319 |
"step": 4550
|
1320 |
},
|
1321 |
{
|
1322 |
+
"epoch": 0.571875,
|
1323 |
+
"grad_norm": 3.8906264305114746,
|
1324 |
+
"learning_rate": 4.570666666666667e-06,
|
1325 |
+
"loss": 0.1451,
|
1326 |
"step": 4575
|
1327 |
},
|
1328 |
{
|
1329 |
+
"epoch": 0.575,
|
1330 |
+
"grad_norm": 3.7733590602874756,
|
1331 |
+
"learning_rate": 4.537333333333334e-06,
|
1332 |
+
"loss": 0.1475,
|
1333 |
"step": 4600
|
1334 |
},
|
1335 |
{
|
1336 |
+
"epoch": 0.578125,
|
1337 |
+
"grad_norm": 3.379163980484009,
|
1338 |
+
"learning_rate": 4.504e-06,
|
1339 |
+
"loss": 0.1509,
|
1340 |
"step": 4625
|
1341 |
},
|
1342 |
{
|
1343 |
+
"epoch": 0.58125,
|
1344 |
+
"grad_norm": 3.4210824966430664,
|
1345 |
+
"learning_rate": 4.470666666666667e-06,
|
1346 |
+
"loss": 0.1444,
|
1347 |
"step": 4650
|
1348 |
},
|
1349 |
{
|
1350 |
+
"epoch": 0.584375,
|
1351 |
+
"grad_norm": 3.7809910774230957,
|
1352 |
+
"learning_rate": 4.437333333333333e-06,
|
1353 |
+
"loss": 0.1295,
|
1354 |
"step": 4675
|
1355 |
},
|
1356 |
{
|
1357 |
+
"epoch": 0.5875,
|
1358 |
+
"grad_norm": 2.537574052810669,
|
1359 |
+
"learning_rate": 4.4040000000000005e-06,
|
1360 |
+
"loss": 0.1158,
|
1361 |
"step": 4700
|
1362 |
},
|
1363 |
{
|
1364 |
+
"epoch": 0.590625,
|
1365 |
+
"grad_norm": 3.482285261154175,
|
1366 |
+
"learning_rate": 4.370666666666667e-06,
|
1367 |
+
"loss": 0.1249,
|
1368 |
"step": 4725
|
1369 |
},
|
1370 |
{
|
1371 |
+
"epoch": 0.59375,
|
1372 |
+
"grad_norm": 3.0114011764526367,
|
1373 |
+
"learning_rate": 4.337333333333334e-06,
|
1374 |
+
"loss": 0.1238,
|
1375 |
"step": 4750
|
1376 |
},
|
1377 |
{
|
1378 |
+
"epoch": 0.596875,
|
1379 |
+
"grad_norm": 2.117215394973755,
|
1380 |
+
"learning_rate": 4.304000000000001e-06,
|
1381 |
+
"loss": 0.0888,
|
1382 |
"step": 4775
|
1383 |
},
|
1384 |
{
|
1385 |
+
"epoch": 0.6,
|
1386 |
+
"grad_norm": 2.0158379077911377,
|
1387 |
+
"learning_rate": 4.270666666666667e-06,
|
1388 |
+
"loss": 0.0972,
|
1389 |
"step": 4800
|
1390 |
},
|
1391 |
{
|
1392 |
+
"epoch": 0.603125,
|
1393 |
+
"grad_norm": 2.5208640098571777,
|
1394 |
+
"learning_rate": 4.2373333333333335e-06,
|
1395 |
+
"loss": 0.0793,
|
1396 |
"step": 4825
|
1397 |
},
|
1398 |
{
|
1399 |
+
"epoch": 0.60625,
|
1400 |
+
"grad_norm": 2.820002555847168,
|
1401 |
+
"learning_rate": 4.204e-06,
|
1402 |
+
"loss": 0.1035,
|
1403 |
"step": 4850
|
1404 |
},
|
1405 |
{
|
1406 |
+
"epoch": 0.609375,
|
1407 |
+
"grad_norm": 3.1144282817840576,
|
1408 |
+
"learning_rate": 4.170666666666667e-06,
|
1409 |
+
"loss": 0.1128,
|
1410 |
"step": 4875
|
1411 |
},
|
1412 |
{
|
1413 |
+
"epoch": 0.6125,
|
1414 |
+
"grad_norm": 3.1345527172088623,
|
1415 |
+
"learning_rate": 4.137333333333334e-06,
|
1416 |
+
"loss": 0.1217,
|
1417 |
"step": 4900
|
1418 |
},
|
1419 |
{
|
1420 |
+
"epoch": 0.615625,
|
1421 |
+
"grad_norm": 2.2702696323394775,
|
1422 |
+
"learning_rate": 4.104e-06,
|
1423 |
+
"loss": 0.1061,
|
1424 |
"step": 4925
|
1425 |
},
|
1426 |
{
|
1427 |
+
"epoch": 0.61875,
|
1428 |
+
"grad_norm": 2.714102268218994,
|
1429 |
+
"learning_rate": 4.072e-06,
|
1430 |
+
"loss": 0.0919,
|
1431 |
"step": 4950
|
1432 |
},
|
1433 |
{
|
1434 |
+
"epoch": 0.621875,
|
1435 |
+
"grad_norm": 2.448854923248291,
|
1436 |
+
"learning_rate": 4.0386666666666666e-06,
|
1437 |
+
"loss": 0.0855,
|
1438 |
"step": 4975
|
1439 |
},
|
1440 |
{
|
1441 |
+
"epoch": 0.625,
|
1442 |
+
"grad_norm": 2.9392127990722656,
|
1443 |
+
"learning_rate": 4.005333333333334e-06,
|
1444 |
+
"loss": 0.0818,
|
1445 |
"step": 5000
|
1446 |
},
|
1447 |
{
|
1448 |
+
"epoch": 0.625,
|
1449 |
+
"eval_loss": 0.2562941014766693,
|
1450 |
+
"eval_runtime": 160.0125,
|
1451 |
+
"eval_samples_per_second": 13.149,
|
1452 |
+
"eval_steps_per_second": 0.825,
|
1453 |
+
"eval_wer": 11.942600729176405,
|
1454 |
"step": 5000
|
1455 |
},
|
1456 |
{
|
1457 |
+
"epoch": 0.628125,
|
1458 |
+
"grad_norm": 2.4964210987091064,
|
1459 |
+
"learning_rate": 3.972e-06,
|
1460 |
+
"loss": 0.1203,
|
1461 |
+
"step": 5025
|
1462 |
+
},
|
1463 |
+
{
|
1464 |
+
"epoch": 0.63125,
|
1465 |
+
"grad_norm": 3.330078125,
|
1466 |
+
"learning_rate": 3.938666666666667e-06,
|
1467 |
+
"loss": 0.111,
|
1468 |
+
"step": 5050
|
1469 |
+
},
|
1470 |
+
{
|
1471 |
+
"epoch": 0.634375,
|
1472 |
+
"grad_norm": 3.6872191429138184,
|
1473 |
+
"learning_rate": 3.905333333333334e-06,
|
1474 |
+
"loss": 0.164,
|
1475 |
+
"step": 5075
|
1476 |
+
},
|
1477 |
+
{
|
1478 |
+
"epoch": 0.6375,
|
1479 |
+
"grad_norm": 3.728769063949585,
|
1480 |
+
"learning_rate": 3.872e-06,
|
1481 |
+
"loss": 0.1515,
|
1482 |
+
"step": 5100
|
1483 |
+
},
|
1484 |
+
{
|
1485 |
+
"epoch": 0.640625,
|
1486 |
+
"grad_norm": 3.4183156490325928,
|
1487 |
+
"learning_rate": 3.838666666666667e-06,
|
1488 |
+
"loss": 0.1334,
|
1489 |
+
"step": 5125
|
1490 |
+
},
|
1491 |
+
{
|
1492 |
+
"epoch": 0.64375,
|
1493 |
+
"grad_norm": 3.4580440521240234,
|
1494 |
+
"learning_rate": 3.8053333333333336e-06,
|
1495 |
+
"loss": 0.134,
|
1496 |
+
"step": 5150
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 0.646875,
|
1500 |
+
"grad_norm": 2.2719855308532715,
|
1501 |
+
"learning_rate": 3.772e-06,
|
1502 |
+
"loss": 0.1088,
|
1503 |
+
"step": 5175
|
1504 |
+
},
|
1505 |
+
{
|
1506 |
+
"epoch": 0.65,
|
1507 |
+
"grad_norm": 2.3186910152435303,
|
1508 |
+
"learning_rate": 3.7386666666666673e-06,
|
1509 |
+
"loss": 0.0724,
|
1510 |
+
"step": 5200
|
1511 |
+
},
|
1512 |
+
{
|
1513 |
+
"epoch": 0.653125,
|
1514 |
+
"grad_norm": 1.8175565004348755,
|
1515 |
+
"learning_rate": 3.7053333333333337e-06,
|
1516 |
+
"loss": 0.0759,
|
1517 |
+
"step": 5225
|
1518 |
+
},
|
1519 |
+
{
|
1520 |
+
"epoch": 0.65625,
|
1521 |
+
"grad_norm": 2.0874826908111572,
|
1522 |
+
"learning_rate": 3.6720000000000006e-06,
|
1523 |
+
"loss": 0.0813,
|
1524 |
+
"step": 5250
|
1525 |
+
},
|
1526 |
+
{
|
1527 |
+
"epoch": 0.659375,
|
1528 |
+
"grad_norm": 1.9950120449066162,
|
1529 |
+
"learning_rate": 3.638666666666667e-06,
|
1530 |
+
"loss": 0.0824,
|
1531 |
+
"step": 5275
|
1532 |
+
},
|
1533 |
+
{
|
1534 |
+
"epoch": 0.6625,
|
1535 |
+
"grad_norm": 2.6349194049835205,
|
1536 |
+
"learning_rate": 3.6053333333333334e-06,
|
1537 |
+
"loss": 0.0835,
|
1538 |
+
"step": 5300
|
1539 |
+
},
|
1540 |
+
{
|
1541 |
+
"epoch": 0.665625,
|
1542 |
+
"grad_norm": 2.7667415142059326,
|
1543 |
+
"learning_rate": 3.5720000000000003e-06,
|
1544 |
+
"loss": 0.0823,
|
1545 |
+
"step": 5325
|
1546 |
+
},
|
1547 |
+
{
|
1548 |
+
"epoch": 0.66875,
|
1549 |
+
"grad_norm": 3.617748260498047,
|
1550 |
+
"learning_rate": 3.538666666666667e-06,
|
1551 |
+
"loss": 0.1077,
|
1552 |
+
"step": 5350
|
1553 |
+
},
|
1554 |
+
{
|
1555 |
+
"epoch": 0.671875,
|
1556 |
+
"grad_norm": 3.2603073120117188,
|
1557 |
+
"learning_rate": 3.5053333333333335e-06,
|
1558 |
+
"loss": 0.1268,
|
1559 |
+
"step": 5375
|
1560 |
+
},
|
1561 |
+
{
|
1562 |
+
"epoch": 0.675,
|
1563 |
+
"grad_norm": 2.9681355953216553,
|
1564 |
+
"learning_rate": 3.4720000000000004e-06,
|
1565 |
+
"loss": 0.1206,
|
1566 |
+
"step": 5400
|
1567 |
+
},
|
1568 |
+
{
|
1569 |
+
"epoch": 0.678125,
|
1570 |
+
"grad_norm": 4.156548500061035,
|
1571 |
+
"learning_rate": 3.438666666666667e-06,
|
1572 |
+
"loss": 0.1279,
|
1573 |
+
"step": 5425
|
1574 |
+
},
|
1575 |
+
{
|
1576 |
+
"epoch": 0.68125,
|
1577 |
+
"grad_norm": 3.2013888359069824,
|
1578 |
+
"learning_rate": 3.4053333333333337e-06,
|
1579 |
+
"loss": 0.1177,
|
1580 |
+
"step": 5450
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 0.684375,
|
1584 |
+
"grad_norm": 3.299403190612793,
|
1585 |
+
"learning_rate": 3.372e-06,
|
1586 |
+
"loss": 0.0946,
|
1587 |
+
"step": 5475
|
1588 |
+
},
|
1589 |
+
{
|
1590 |
+
"epoch": 0.6875,
|
1591 |
+
"grad_norm": 2.39630389213562,
|
1592 |
+
"learning_rate": 3.338666666666667e-06,
|
1593 |
+
"loss": 0.0944,
|
1594 |
+
"step": 5500
|
1595 |
+
},
|
1596 |
+
{
|
1597 |
+
"epoch": 0.690625,
|
1598 |
+
"grad_norm": 3.7624928951263428,
|
1599 |
+
"learning_rate": 3.3053333333333338e-06,
|
1600 |
+
"loss": 0.1149,
|
1601 |
+
"step": 5525
|
1602 |
+
},
|
1603 |
+
{
|
1604 |
+
"epoch": 0.69375,
|
1605 |
+
"grad_norm": 3.3170886039733887,
|
1606 |
+
"learning_rate": 3.272e-06,
|
1607 |
+
"loss": 0.1373,
|
1608 |
+
"step": 5550
|
1609 |
+
},
|
1610 |
+
{
|
1611 |
+
"epoch": 0.696875,
|
1612 |
+
"grad_norm": 2.2296531200408936,
|
1613 |
+
"learning_rate": 3.238666666666667e-06,
|
1614 |
+
"loss": 0.1056,
|
1615 |
+
"step": 5575
|
1616 |
+
},
|
1617 |
+
{
|
1618 |
+
"epoch": 0.7,
|
1619 |
+
"grad_norm": 1.8995999097824097,
|
1620 |
+
"learning_rate": 3.2053333333333334e-06,
|
1621 |
+
"loss": 0.0724,
|
1622 |
+
"step": 5600
|
1623 |
+
},
|
1624 |
+
{
|
1625 |
+
"epoch": 0.703125,
|
1626 |
+
"grad_norm": 2.3782520294189453,
|
1627 |
+
"learning_rate": 3.172e-06,
|
1628 |
+
"loss": 0.0604,
|
1629 |
+
"step": 5625
|
1630 |
+
},
|
1631 |
+
{
|
1632 |
+
"epoch": 0.70625,
|
1633 |
+
"grad_norm": 2.2558810710906982,
|
1634 |
+
"learning_rate": 3.138666666666667e-06,
|
1635 |
+
"loss": 0.0581,
|
1636 |
+
"step": 5650
|
1637 |
+
},
|
1638 |
+
{
|
1639 |
+
"epoch": 0.709375,
|
1640 |
+
"grad_norm": 2.4040448665618896,
|
1641 |
+
"learning_rate": 3.1053333333333336e-06,
|
1642 |
+
"loss": 0.0713,
|
1643 |
+
"step": 5675
|
1644 |
+
},
|
1645 |
+
{
|
1646 |
+
"epoch": 0.7125,
|
1647 |
+
"grad_norm": 2.5696732997894287,
|
1648 |
+
"learning_rate": 3.072e-06,
|
1649 |
+
"loss": 0.0773,
|
1650 |
+
"step": 5700
|
1651 |
+
},
|
1652 |
+
{
|
1653 |
+
"epoch": 0.715625,
|
1654 |
+
"grad_norm": 2.237166404724121,
|
1655 |
+
"learning_rate": 3.038666666666667e-06,
|
1656 |
+
"loss": 0.0765,
|
1657 |
+
"step": 5725
|
1658 |
+
},
|
1659 |
+
{
|
1660 |
+
"epoch": 0.71875,
|
1661 |
+
"grad_norm": 1.8783671855926514,
|
1662 |
+
"learning_rate": 3.0053333333333332e-06,
|
1663 |
+
"loss": 0.0779,
|
1664 |
+
"step": 5750
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"epoch": 0.721875,
|
1668 |
+
"grad_norm": 2.096334457397461,
|
1669 |
+
"learning_rate": 2.9720000000000005e-06,
|
1670 |
+
"loss": 0.0751,
|
1671 |
+
"step": 5775
|
1672 |
+
},
|
1673 |
+
{
|
1674 |
+
"epoch": 0.725,
|
1675 |
+
"grad_norm": 2.0362164974212646,
|
1676 |
+
"learning_rate": 2.938666666666667e-06,
|
1677 |
+
"loss": 0.0711,
|
1678 |
+
"step": 5800
|
1679 |
+
},
|
1680 |
+
{
|
1681 |
+
"epoch": 0.728125,
|
1682 |
+
"grad_norm": 1.7136311531066895,
|
1683 |
+
"learning_rate": 2.9053333333333334e-06,
|
1684 |
+
"loss": 0.0635,
|
1685 |
+
"step": 5825
|
1686 |
+
},
|
1687 |
+
{
|
1688 |
+
"epoch": 0.73125,
|
1689 |
+
"grad_norm": 2.754848003387451,
|
1690 |
+
"learning_rate": 2.872e-06,
|
1691 |
+
"loss": 0.0698,
|
1692 |
+
"step": 5850
|
1693 |
+
},
|
1694 |
+
{
|
1695 |
+
"epoch": 0.734375,
|
1696 |
+
"grad_norm": 2.058065176010132,
|
1697 |
+
"learning_rate": 2.8386666666666666e-06,
|
1698 |
+
"loss": 0.0741,
|
1699 |
+
"step": 5875
|
1700 |
+
},
|
1701 |
+
{
|
1702 |
+
"epoch": 0.7375,
|
1703 |
+
"grad_norm": 3.0389583110809326,
|
1704 |
+
"learning_rate": 2.805333333333334e-06,
|
1705 |
+
"loss": 0.0938,
|
1706 |
+
"step": 5900
|
1707 |
+
},
|
1708 |
+
{
|
1709 |
+
"epoch": 0.740625,
|
1710 |
+
"grad_norm": 3.4811720848083496,
|
1711 |
+
"learning_rate": 2.7720000000000003e-06,
|
1712 |
+
"loss": 0.1387,
|
1713 |
+
"step": 5925
|
1714 |
+
},
|
1715 |
+
{
|
1716 |
+
"epoch": 0.74375,
|
1717 |
+
"grad_norm": 3.2388477325439453,
|
1718 |
+
"learning_rate": 2.7386666666666667e-06,
|
1719 |
+
"loss": 0.1283,
|
1720 |
+
"step": 5950
|
1721 |
+
},
|
1722 |
+
{
|
1723 |
+
"epoch": 0.746875,
|
1724 |
+
"grad_norm": 3.083925247192383,
|
1725 |
+
"learning_rate": 2.7053333333333336e-06,
|
1726 |
+
"loss": 0.1073,
|
1727 |
+
"step": 5975
|
1728 |
+
},
|
1729 |
+
{
|
1730 |
+
"epoch": 0.75,
|
1731 |
+
"grad_norm": 2.6847918033599854,
|
1732 |
+
"learning_rate": 2.672e-06,
|
1733 |
+
"loss": 0.1038,
|
1734 |
+
"step": 6000
|
1735 |
+
},
|
1736 |
+
{
|
1737 |
+
"epoch": 0.75,
|
1738 |
+
"eval_loss": 0.23902159929275513,
|
1739 |
+
"eval_runtime": 158.0693,
|
1740 |
+
"eval_samples_per_second": 13.311,
|
1741 |
+
"eval_steps_per_second": 0.835,
|
1742 |
+
"eval_wer": 11.07319809292325,
|
1743 |
+
"step": 6000
|
1744 |
+
},
|
1745 |
+
{
|
1746 |
+
"epoch": 0.753125,
|
1747 |
+
"grad_norm": 2.7315189838409424,
|
1748 |
+
"learning_rate": 2.6386666666666673e-06,
|
1749 |
+
"loss": 0.0987,
|
1750 |
+
"step": 6025
|
1751 |
+
},
|
1752 |
+
{
|
1753 |
+
"epoch": 0.75625,
|
1754 |
+
"grad_norm": 2.3389735221862793,
|
1755 |
+
"learning_rate": 2.6053333333333337e-06,
|
1756 |
+
"loss": 0.0858,
|
1757 |
+
"step": 6050
|
1758 |
+
},
|
1759 |
+
{
|
1760 |
+
"epoch": 0.759375,
|
1761 |
+
"grad_norm": 1.982534646987915,
|
1762 |
+
"learning_rate": 2.572e-06,
|
1763 |
+
"loss": 0.0764,
|
1764 |
+
"step": 6075
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 0.7625,
|
1768 |
+
"grad_norm": 1.9040074348449707,
|
1769 |
+
"learning_rate": 2.538666666666667e-06,
|
1770 |
+
"loss": 0.0731,
|
1771 |
+
"step": 6100
|
1772 |
+
},
|
1773 |
+
{
|
1774 |
+
"epoch": 0.765625,
|
1775 |
+
"grad_norm": 2.654710054397583,
|
1776 |
+
"learning_rate": 2.5053333333333334e-06,
|
1777 |
+
"loss": 0.0758,
|
1778 |
+
"step": 6125
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 0.76875,
|
1782 |
+
"grad_norm": 2.6400296688079834,
|
1783 |
+
"learning_rate": 2.4720000000000002e-06,
|
1784 |
+
"loss": 0.0824,
|
1785 |
+
"step": 6150
|
1786 |
+
},
|
1787 |
+
{
|
1788 |
+
"epoch": 0.771875,
|
1789 |
+
"grad_norm": 7.269197463989258,
|
1790 |
+
"learning_rate": 2.438666666666667e-06,
|
1791 |
+
"loss": 0.0822,
|
1792 |
+
"step": 6175
|
1793 |
+
},
|
1794 |
+
{
|
1795 |
+
"epoch": 0.775,
|
1796 |
+
"grad_norm": 2.363656520843506,
|
1797 |
+
"learning_rate": 2.4053333333333335e-06,
|
1798 |
+
"loss": 0.0818,
|
1799 |
+
"step": 6200
|
1800 |
+
},
|
1801 |
+
{
|
1802 |
+
"epoch": 0.778125,
|
1803 |
+
"grad_norm": 2.4660115242004395,
|
1804 |
+
"learning_rate": 2.3720000000000003e-06,
|
1805 |
+
"loss": 0.0768,
|
1806 |
+
"step": 6225
|
1807 |
+
},
|
1808 |
+
{
|
1809 |
+
"epoch": 0.78125,
|
1810 |
+
"grad_norm": 3.3116371631622314,
|
1811 |
+
"learning_rate": 2.3386666666666668e-06,
|
1812 |
+
"loss": 0.0783,
|
1813 |
+
"step": 6250
|
1814 |
+
},
|
1815 |
+
{
|
1816 |
+
"epoch": 0.784375,
|
1817 |
+
"grad_norm": 2.595853090286255,
|
1818 |
+
"learning_rate": 2.3053333333333336e-06,
|
1819 |
+
"loss": 0.0899,
|
1820 |
+
"step": 6275
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 0.7875,
|
1824 |
+
"grad_norm": 2.709597587585449,
|
1825 |
+
"learning_rate": 2.2720000000000004e-06,
|
1826 |
+
"loss": 0.0953,
|
1827 |
+
"step": 6300
|
1828 |
+
},
|
1829 |
+
{
|
1830 |
+
"epoch": 0.790625,
|
1831 |
+
"grad_norm": 2.4446637630462646,
|
1832 |
+
"learning_rate": 2.238666666666667e-06,
|
1833 |
+
"loss": 0.1249,
|
1834 |
+
"step": 6325
|
1835 |
+
},
|
1836 |
+
{
|
1837 |
+
"epoch": 0.79375,
|
1838 |
+
"grad_norm": 3.4412341117858887,
|
1839 |
+
"learning_rate": 2.2053333333333333e-06,
|
1840 |
+
"loss": 0.1171,
|
1841 |
+
"step": 6350
|
1842 |
+
},
|
1843 |
+
{
|
1844 |
+
"epoch": 0.796875,
|
1845 |
+
"grad_norm": 2.2719008922576904,
|
1846 |
+
"learning_rate": 2.172e-06,
|
1847 |
+
"loss": 0.1065,
|
1848 |
+
"step": 6375
|
1849 |
+
},
|
1850 |
+
{
|
1851 |
+
"epoch": 0.8,
|
1852 |
+
"grad_norm": 1.9873290061950684,
|
1853 |
+
"learning_rate": 2.138666666666667e-06,
|
1854 |
+
"loss": 0.0872,
|
1855 |
+
"step": 6400
|
1856 |
+
},
|
1857 |
+
{
|
1858 |
+
"epoch": 0.803125,
|
1859 |
+
"grad_norm": 2.487403392791748,
|
1860 |
+
"learning_rate": 2.1053333333333334e-06,
|
1861 |
+
"loss": 0.0765,
|
1862 |
+
"step": 6425
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 0.80625,
|
1866 |
+
"grad_norm": 2.4424736499786377,
|
1867 |
+
"learning_rate": 2.0720000000000002e-06,
|
1868 |
+
"loss": 0.0736,
|
1869 |
+
"step": 6450
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 0.809375,
|
1873 |
+
"grad_norm": 3.1507577896118164,
|
1874 |
+
"learning_rate": 2.0386666666666667e-06,
|
1875 |
+
"loss": 0.1064,
|
1876 |
+
"step": 6475
|
1877 |
+
},
|
1878 |
+
{
|
1879 |
+
"epoch": 0.8125,
|
1880 |
+
"grad_norm": 2.6285648345947266,
|
1881 |
+
"learning_rate": 2.0053333333333335e-06,
|
1882 |
+
"loss": 0.0993,
|
1883 |
+
"step": 6500
|
1884 |
+
},
|
1885 |
+
{
|
1886 |
+
"epoch": 0.815625,
|
1887 |
+
"grad_norm": 4.1934967041015625,
|
1888 |
+
"learning_rate": 1.972e-06,
|
1889 |
+
"loss": 0.1299,
|
1890 |
+
"step": 6525
|
1891 |
+
},
|
1892 |
+
{
|
1893 |
+
"epoch": 0.81875,
|
1894 |
+
"grad_norm": 3.031852960586548,
|
1895 |
+
"learning_rate": 1.9386666666666668e-06,
|
1896 |
+
"loss": 0.1195,
|
1897 |
+
"step": 6550
|
1898 |
+
},
|
1899 |
+
{
|
1900 |
+
"epoch": 0.821875,
|
1901 |
+
"grad_norm": 2.9288837909698486,
|
1902 |
+
"learning_rate": 1.9053333333333334e-06,
|
1903 |
+
"loss": 0.1197,
|
1904 |
+
"step": 6575
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 0.825,
|
1908 |
+
"grad_norm": 2.890054225921631,
|
1909 |
+
"learning_rate": 1.8720000000000002e-06,
|
1910 |
+
"loss": 0.1127,
|
1911 |
+
"step": 6600
|
1912 |
+
},
|
1913 |
+
{
|
1914 |
+
"epoch": 0.828125,
|
1915 |
+
"grad_norm": 3.130406618118286,
|
1916 |
+
"learning_rate": 1.8386666666666669e-06,
|
1917 |
+
"loss": 0.1155,
|
1918 |
+
"step": 6625
|
1919 |
+
},
|
1920 |
+
{
|
1921 |
+
"epoch": 0.83125,
|
1922 |
+
"grad_norm": 2.7169485092163086,
|
1923 |
+
"learning_rate": 1.8053333333333333e-06,
|
1924 |
+
"loss": 0.1291,
|
1925 |
+
"step": 6650
|
1926 |
+
},
|
1927 |
+
{
|
1928 |
+
"epoch": 0.834375,
|
1929 |
+
"grad_norm": 2.7390034198760986,
|
1930 |
+
"learning_rate": 1.7720000000000001e-06,
|
1931 |
+
"loss": 0.1097,
|
1932 |
+
"step": 6675
|
1933 |
+
},
|
1934 |
+
{
|
1935 |
+
"epoch": 0.8375,
|
1936 |
+
"grad_norm": 2.161604166030884,
|
1937 |
+
"learning_rate": 1.7386666666666668e-06,
|
1938 |
+
"loss": 0.1022,
|
1939 |
+
"step": 6700
|
1940 |
+
},
|
1941 |
+
{
|
1942 |
+
"epoch": 0.840625,
|
1943 |
+
"grad_norm": 2.210451126098633,
|
1944 |
+
"learning_rate": 1.7053333333333336e-06,
|
1945 |
+
"loss": 0.0779,
|
1946 |
+
"step": 6725
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 0.84375,
|
1950 |
+
"grad_norm": 2.426438808441162,
|
1951 |
+
"learning_rate": 1.672e-06,
|
1952 |
+
"loss": 0.0728,
|
1953 |
+
"step": 6750
|
1954 |
+
},
|
1955 |
+
{
|
1956 |
+
"epoch": 0.846875,
|
1957 |
+
"grad_norm": 2.8744237422943115,
|
1958 |
+
"learning_rate": 1.6386666666666667e-06,
|
1959 |
+
"loss": 0.0859,
|
1960 |
+
"step": 6775
|
1961 |
+
},
|
1962 |
+
{
|
1963 |
+
"epoch": 0.85,
|
1964 |
+
"grad_norm": 2.8165483474731445,
|
1965 |
+
"learning_rate": 1.6053333333333335e-06,
|
1966 |
+
"loss": 0.1496,
|
1967 |
+
"step": 6800
|
1968 |
+
},
|
1969 |
+
{
|
1970 |
+
"epoch": 0.853125,
|
1971 |
+
"grad_norm": 4.0077738761901855,
|
1972 |
+
"learning_rate": 1.5720000000000002e-06,
|
1973 |
+
"loss": 0.1343,
|
1974 |
+
"step": 6825
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 0.85625,
|
1978 |
+
"grad_norm": 3.8011586666107178,
|
1979 |
+
"learning_rate": 1.538666666666667e-06,
|
1980 |
+
"loss": 0.1397,
|
1981 |
+
"step": 6850
|
1982 |
+
},
|
1983 |
+
{
|
1984 |
+
"epoch": 0.859375,
|
1985 |
+
"grad_norm": 2.7379047870635986,
|
1986 |
+
"learning_rate": 1.5053333333333334e-06,
|
1987 |
+
"loss": 0.1262,
|
1988 |
+
"step": 6875
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 0.8625,
|
1992 |
+
"grad_norm": 3.250950574874878,
|
1993 |
+
"learning_rate": 1.472e-06,
|
1994 |
+
"loss": 0.1188,
|
1995 |
+
"step": 6900
|
1996 |
+
},
|
1997 |
+
{
|
1998 |
+
"epoch": 0.865625,
|
1999 |
+
"grad_norm": 2.782945156097412,
|
2000 |
+
"learning_rate": 1.438666666666667e-06,
|
2001 |
+
"loss": 0.1103,
|
2002 |
+
"step": 6925
|
2003 |
+
},
|
2004 |
+
{
|
2005 |
+
"epoch": 0.86875,
|
2006 |
+
"grad_norm": 3.08154034614563,
|
2007 |
+
"learning_rate": 1.4053333333333335e-06,
|
2008 |
+
"loss": 0.1147,
|
2009 |
+
"step": 6950
|
2010 |
+
},
|
2011 |
+
{
|
2012 |
+
"epoch": 0.871875,
|
2013 |
+
"grad_norm": 3.5768070220947266,
|
2014 |
+
"learning_rate": 1.372e-06,
|
2015 |
+
"loss": 0.1332,
|
2016 |
+
"step": 6975
|
2017 |
+
},
|
2018 |
+
{
|
2019 |
+
"epoch": 0.875,
|
2020 |
+
"grad_norm": 3.155341863632202,
|
2021 |
+
"learning_rate": 1.3386666666666668e-06,
|
2022 |
+
"loss": 0.1282,
|
2023 |
+
"step": 7000
|
2024 |
+
},
|
2025 |
+
{
|
2026 |
+
"epoch": 0.875,
|
2027 |
+
"eval_loss": 0.23438745737075806,
|
2028 |
+
"eval_runtime": 154.8314,
|
2029 |
+
"eval_samples_per_second": 13.589,
|
2030 |
+
"eval_steps_per_second": 0.853,
|
2031 |
+
"eval_wer": 11.391044218005048,
|
2032 |
+
"step": 7000
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 0.878125,
|
2036 |
+
"grad_norm": 11.062019348144531,
|
2037 |
+
"learning_rate": 1.308e-06,
|
2038 |
+
"loss": 0.2406,
|
2039 |
+
"step": 7025
|
2040 |
+
},
|
2041 |
+
{
|
2042 |
+
"epoch": 0.88125,
|
2043 |
+
"grad_norm": 4.648179531097412,
|
2044 |
+
"learning_rate": 1.2746666666666669e-06,
|
2045 |
+
"loss": 0.3469,
|
2046 |
+
"step": 7050
|
2047 |
+
},
|
2048 |
+
{
|
2049 |
+
"epoch": 0.884375,
|
2050 |
+
"grad_norm": 4.388245105743408,
|
2051 |
+
"learning_rate": 1.2413333333333335e-06,
|
2052 |
+
"loss": 0.3421,
|
2053 |
+
"step": 7075
|
2054 |
+
},
|
2055 |
+
{
|
2056 |
+
"epoch": 0.8875,
|
2057 |
+
"grad_norm": 4.806427478790283,
|
2058 |
+
"learning_rate": 1.2080000000000001e-06,
|
2059 |
+
"loss": 0.2847,
|
2060 |
+
"step": 7100
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 0.890625,
|
2064 |
+
"grad_norm": 3.0818049907684326,
|
2065 |
+
"learning_rate": 1.1746666666666668e-06,
|
2066 |
+
"loss": 0.1671,
|
2067 |
+
"step": 7125
|
2068 |
+
},
|
2069 |
+
{
|
2070 |
+
"epoch": 0.89375,
|
2071 |
+
"grad_norm": 4.117819309234619,
|
2072 |
+
"learning_rate": 1.1413333333333334e-06,
|
2073 |
+
"loss": 0.1313,
|
2074 |
+
"step": 7150
|
2075 |
+
},
|
2076 |
+
{
|
2077 |
+
"epoch": 0.896875,
|
2078 |
+
"grad_norm": 2.8558835983276367,
|
2079 |
+
"learning_rate": 1.108e-06,
|
2080 |
+
"loss": 0.1177,
|
2081 |
+
"step": 7175
|
2082 |
+
},
|
2083 |
+
{
|
2084 |
+
"epoch": 0.9,
|
2085 |
+
"grad_norm": 3.0425021648406982,
|
2086 |
+
"learning_rate": 1.0746666666666669e-06,
|
2087 |
+
"loss": 0.0911,
|
2088 |
+
"step": 7200
|
2089 |
+
},
|
2090 |
+
{
|
2091 |
+
"epoch": 0.903125,
|
2092 |
+
"grad_norm": 2.6587588787078857,
|
2093 |
+
"learning_rate": 1.0413333333333333e-06,
|
2094 |
+
"loss": 0.0898,
|
2095 |
+
"step": 7225
|
2096 |
+
},
|
2097 |
+
{
|
2098 |
+
"epoch": 0.90625,
|
2099 |
+
"grad_norm": 1.7572664022445679,
|
2100 |
+
"learning_rate": 1.0080000000000001e-06,
|
2101 |
+
"loss": 0.0922,
|
2102 |
+
"step": 7250
|
2103 |
+
},
|
2104 |
+
{
|
2105 |
+
"epoch": 0.909375,
|
2106 |
+
"grad_norm": 2.00393009185791,
|
2107 |
+
"learning_rate": 9.746666666666668e-07,
|
2108 |
+
"loss": 0.0753,
|
2109 |
+
"step": 7275
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 0.9125,
|
2113 |
+
"grad_norm": 1.845981478691101,
|
2114 |
+
"learning_rate": 9.413333333333334e-07,
|
2115 |
+
"loss": 0.0628,
|
2116 |
+
"step": 7300
|
2117 |
+
},
|
2118 |
+
{
|
2119 |
+
"epoch": 0.915625,
|
2120 |
+
"grad_norm": 2.008112907409668,
|
2121 |
+
"learning_rate": 9.080000000000001e-07,
|
2122 |
+
"loss": 0.0696,
|
2123 |
+
"step": 7325
|
2124 |
+
},
|
2125 |
+
{
|
2126 |
+
"epoch": 0.91875,
|
2127 |
+
"grad_norm": 2.837357759475708,
|
2128 |
+
"learning_rate": 8.746666666666668e-07,
|
2129 |
+
"loss": 0.0897,
|
2130 |
+
"step": 7350
|
2131 |
+
},
|
2132 |
+
{
|
2133 |
+
"epoch": 0.921875,
|
2134 |
+
"grad_norm": 2.4842417240142822,
|
2135 |
+
"learning_rate": 8.413333333333334e-07,
|
2136 |
+
"loss": 0.1227,
|
2137 |
+
"step": 7375
|
2138 |
+
},
|
2139 |
+
{
|
2140 |
+
"epoch": 0.925,
|
2141 |
+
"grad_norm": 2.7866716384887695,
|
2142 |
+
"learning_rate": 8.08e-07,
|
2143 |
+
"loss": 0.1012,
|
2144 |
+
"step": 7400
|
2145 |
+
},
|
2146 |
+
{
|
2147 |
+
"epoch": 0.928125,
|
2148 |
+
"grad_norm": 2.1826930046081543,
|
2149 |
+
"learning_rate": 7.746666666666668e-07,
|
2150 |
+
"loss": 0.1141,
|
2151 |
+
"step": 7425
|
2152 |
+
},
|
2153 |
+
{
|
2154 |
+
"epoch": 0.93125,
|
2155 |
+
"grad_norm": 2.014090061187744,
|
2156 |
+
"learning_rate": 7.413333333333333e-07,
|
2157 |
+
"loss": 0.0754,
|
2158 |
+
"step": 7450
|
2159 |
+
},
|
2160 |
+
{
|
2161 |
+
"epoch": 0.934375,
|
2162 |
+
"grad_norm": 2.1539175510406494,
|
2163 |
+
"learning_rate": 7.08e-07,
|
2164 |
+
"loss": 0.0736,
|
2165 |
+
"step": 7475
|
2166 |
+
},
|
2167 |
+
{
|
2168 |
+
"epoch": 0.9375,
|
2169 |
+
"grad_norm": 2.712541341781616,
|
2170 |
+
"learning_rate": 6.746666666666667e-07,
|
2171 |
+
"loss": 0.0684,
|
2172 |
+
"step": 7500
|
2173 |
+
},
|
2174 |
+
{
|
2175 |
+
"epoch": 0.940625,
|
2176 |
+
"grad_norm": 3.281242847442627,
|
2177 |
+
"learning_rate": 6.413333333333334e-07,
|
2178 |
+
"loss": 0.1414,
|
2179 |
+
"step": 7525
|
2180 |
+
},
|
2181 |
+
{
|
2182 |
+
"epoch": 0.94375,
|
2183 |
+
"grad_norm": 4.088025093078613,
|
2184 |
+
"learning_rate": 6.08e-07,
|
2185 |
+
"loss": 0.1895,
|
2186 |
+
"step": 7550
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 0.946875,
|
2190 |
+
"grad_norm": 4.144560813903809,
|
2191 |
+
"learning_rate": 5.746666666666667e-07,
|
2192 |
+
"loss": 0.222,
|
2193 |
+
"step": 7575
|
2194 |
+
},
|
2195 |
+
{
|
2196 |
+
"epoch": 0.95,
|
2197 |
+
"grad_norm": 1.8468823432922363,
|
2198 |
+
"learning_rate": 5.413333333333334e-07,
|
2199 |
+
"loss": 0.1349,
|
2200 |
+
"step": 7600
|
2201 |
+
},
|
2202 |
+
{
|
2203 |
+
"epoch": 0.953125,
|
2204 |
+
"grad_norm": 2.5354621410369873,
|
2205 |
+
"learning_rate": 5.08e-07,
|
2206 |
+
"loss": 0.0872,
|
2207 |
+
"step": 7625
|
2208 |
+
},
|
2209 |
+
{
|
2210 |
+
"epoch": 0.95625,
|
2211 |
+
"grad_norm": 1.83882737159729,
|
2212 |
+
"learning_rate": 4.746666666666667e-07,
|
2213 |
+
"loss": 0.0725,
|
2214 |
+
"step": 7650
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 0.959375,
|
2218 |
+
"grad_norm": 3.42556095123291,
|
2219 |
+
"learning_rate": 4.413333333333333e-07,
|
2220 |
+
"loss": 0.0988,
|
2221 |
+
"step": 7675
|
2222 |
+
},
|
2223 |
+
{
|
2224 |
+
"epoch": 0.9625,
|
2225 |
+
"grad_norm": 2.682558059692383,
|
2226 |
+
"learning_rate": 4.0800000000000005e-07,
|
2227 |
+
"loss": 0.1166,
|
2228 |
+
"step": 7700
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 0.965625,
|
2232 |
+
"grad_norm": 3.2471797466278076,
|
2233 |
+
"learning_rate": 3.7466666666666674e-07,
|
2234 |
+
"loss": 0.1257,
|
2235 |
+
"step": 7725
|
2236 |
+
},
|
2237 |
+
{
|
2238 |
+
"epoch": 0.96875,
|
2239 |
+
"grad_norm": 2.4202020168304443,
|
2240 |
+
"learning_rate": 3.4133333333333337e-07,
|
2241 |
+
"loss": 0.1114,
|
2242 |
+
"step": 7750
|
2243 |
+
},
|
2244 |
+
{
|
2245 |
+
"epoch": 0.971875,
|
2246 |
+
"grad_norm": 2.8282711505889893,
|
2247 |
+
"learning_rate": 3.0800000000000006e-07,
|
2248 |
+
"loss": 0.0811,
|
2249 |
+
"step": 7775
|
2250 |
+
},
|
2251 |
+
{
|
2252 |
+
"epoch": 0.975,
|
2253 |
+
"grad_norm": 4.20676326751709,
|
2254 |
+
"learning_rate": 2.746666666666667e-07,
|
2255 |
+
"loss": 0.104,
|
2256 |
+
"step": 7800
|
2257 |
+
},
|
2258 |
+
{
|
2259 |
+
"epoch": 0.978125,
|
2260 |
+
"grad_norm": 4.955998420715332,
|
2261 |
+
"learning_rate": 2.413333333333333e-07,
|
2262 |
+
"loss": 0.2773,
|
2263 |
+
"step": 7825
|
2264 |
+
},
|
2265 |
+
{
|
2266 |
+
"epoch": 0.98125,
|
2267 |
+
"grad_norm": 2.0168468952178955,
|
2268 |
+
"learning_rate": 2.08e-07,
|
2269 |
+
"loss": 0.1105,
|
2270 |
+
"step": 7850
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 0.984375,
|
2274 |
+
"grad_norm": 1.6335862874984741,
|
2275 |
+
"learning_rate": 1.7466666666666667e-07,
|
2276 |
+
"loss": 0.0808,
|
2277 |
+
"step": 7875
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 0.9875,
|
2281 |
+
"grad_norm": 2.269954204559326,
|
2282 |
+
"learning_rate": 1.4133333333333333e-07,
|
2283 |
+
"loss": 0.0786,
|
2284 |
+
"step": 7900
|
2285 |
+
},
|
2286 |
+
{
|
2287 |
+
"epoch": 0.990625,
|
2288 |
+
"grad_norm": 2.0813560485839844,
|
2289 |
+
"learning_rate": 1.0800000000000001e-07,
|
2290 |
+
"loss": 0.0801,
|
2291 |
+
"step": 7925
|
2292 |
+
},
|
2293 |
+
{
|
2294 |
+
"epoch": 0.99375,
|
2295 |
+
"grad_norm": 1.6093230247497559,
|
2296 |
+
"learning_rate": 7.466666666666667e-08,
|
2297 |
+
"loss": 0.0687,
|
2298 |
+
"step": 7950
|
2299 |
+
},
|
2300 |
+
{
|
2301 |
+
"epoch": 0.996875,
|
2302 |
+
"grad_norm": 1.730695366859436,
|
2303 |
+
"learning_rate": 4.133333333333334e-08,
|
2304 |
+
"loss": 0.0814,
|
2305 |
+
"step": 7975
|
2306 |
+
},
|
2307 |
+
{
|
2308 |
+
"epoch": 1.0,
|
2309 |
+
"grad_norm": 3.418311595916748,
|
2310 |
+
"learning_rate": 8e-09,
|
2311 |
+
"loss": 0.0959,
|
2312 |
+
"step": 8000
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 1.0,
|
2316 |
+
"eval_loss": 0.1835634410381317,
|
2317 |
+
"eval_runtime": 154.4338,
|
2318 |
+
"eval_samples_per_second": 13.624,
|
2319 |
+
"eval_steps_per_second": 0.855,
|
2320 |
+
"eval_wer": 10.886229784051602,
|
2321 |
+
"step": 8000
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 1.0,
|
2325 |
+
"step": 8000,
|
2326 |
+
"total_flos": 7.387786248192e+19,
|
2327 |
+
"train_loss": 0.17036041705310345,
|
2328 |
+
"train_runtime": 11036.9074,
|
2329 |
+
"train_samples_per_second": 23.195,
|
2330 |
+
"train_steps_per_second": 0.725
|
2331 |
}
|
2332 |
],
|
2333 |
+
"logging_steps": 25,
|
2334 |
+
"max_steps": 8000,
|
2335 |
+
"num_input_tokens_seen": 0,
|
2336 |
"num_train_epochs": 9223372036854775807,
|
2337 |
+
"save_steps": 1000,
|
2338 |
+
"stateful_callbacks": {
|
2339 |
+
"TrainerControl": {
|
2340 |
+
"args": {
|
2341 |
+
"should_epoch_stop": false,
|
2342 |
+
"should_evaluate": false,
|
2343 |
+
"should_log": false,
|
2344 |
+
"should_save": true,
|
2345 |
+
"should_training_stop": true
|
2346 |
+
},
|
2347 |
+
"attributes": {}
|
2348 |
+
}
|
2349 |
+
},
|
2350 |
+
"total_flos": 7.387786248192e+19,
|
2351 |
+
"train_batch_size": 32,
|
2352 |
"trial_name": null,
|
2353 |
"trial_params": null
|
2354 |
}
|
wandb/run-20250212_152709-lejyafmi/files/output.log
CHANGED
@@ -1612,3 +1612,171 @@ It seems you are trying to upload a large folder at once. This might take some t
|
|
1612 |
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1613 |
warnings.warn(
|
1614 |
run-lejyafmi.wandb: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1612 |
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1613 |
warnings.warn(
|
1614 |
run-lejyafmi.wandb: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4.62M/4.62M [00:01<00:00, 3.10MB/s]
|
1615 |
+
***** train metrics *****
|
1616 |
+
epoch = 1.0
|
1617 |
+
total_flos = 68804121093GF
|
1618 |
+
train_loss = 0.1704
|
1619 |
+
train_runtime = 3:03:56.90
|
1620 |
+
train_samples_per_second = 23.195
|
1621 |
+
train_steps_per_second = 0.725
|
1622 |
+
02/12/2025 18:34:14 - INFO - __main__ - *** Evaluate ***
|
1623 |
+
[INFO|trainer.py:4176] 2025-02-12 18:34:14,390 >>
|
1624 |
+
***** Running Evaluation *****
|
1625 |
+
[INFO|trainer.py:4180] 2025-02-12 18:34:14,390 >> Num examples: Unknown
|
1626 |
+
[INFO|trainer.py:4181] 2025-02-12 18:34:14,390 >> Batch size = 16
|
1627 |
+
[INFO|trainer_utils.py:837] 2025-02-12 18:34:21,770 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
|
1628 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:21,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1629 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:23,093 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1630 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:24,451 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1631 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:25,719 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1632 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:26,825 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1633 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:28,007 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1634 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:29,124 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1635 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:30,269 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1636 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:31,477 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1637 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:32,588 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1638 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:33,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1639 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:34,837 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1640 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:35,809 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1641 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:36,700 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1642 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:37,736 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1643 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:38,634 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1644 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:39,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1645 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:40,537 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1646 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:41,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1647 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:42,380 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1648 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:43,354 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1649 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:44,341 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1650 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:45,314 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1651 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:46,305 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1652 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:47,281 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1653 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:48,258 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1654 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:49,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1655 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:50,458 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1656 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:51,370 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1657 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:52,358 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1658 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:53,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1659 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:54,416 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1660 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:55,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1661 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:56,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1662 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:57,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1663 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:58,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1664 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:34:59,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1665 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:00,681 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1666 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:01,717 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1667 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:03,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1668 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:04,967 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1669 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:05,993 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1670 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:06,930 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1671 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:07,915 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1672 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:08,880 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1673 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:09,968 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1674 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:10,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1675 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:11,921 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1676 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:13,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1677 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:14,074 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1678 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:15,109 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1679 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:16,148 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1680 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:17,091 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1681 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:18,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1682 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:19,129 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1683 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:20,248 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1684 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:21,190 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1685 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:22,160 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1686 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:23,208 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1687 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:24,153 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1688 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:25,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1689 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:26,238 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1690 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:27,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1691 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:28,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1692 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:29,260 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1693 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:30,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1694 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:31,206 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1695 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:32,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1696 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:33,287 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1697 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:34,297 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1698 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:35,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1699 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:36,401 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1700 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:37,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1701 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:38,517 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1702 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:39,530 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1703 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:40,569 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1704 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:41,532 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1705 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:42,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1706 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:43,550 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1707 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:44,509 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1708 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:45,496 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1709 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:46,461 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1710 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:47,479 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1711 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:48,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1712 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:49,591 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1713 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:50,593 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1714 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:51,616 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1715 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:52,742 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1716 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:53,784 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1717 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:54,932 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1718 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:55,969 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1719 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:56,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1720 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:57,957 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1721 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:35:58,926 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1722 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:00,028 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1723 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:01,064 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1724 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:02,083 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1725 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:03,110 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1726 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:04,102 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1727 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:05,141 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1728 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:06,171 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1729 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:07,239 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1730 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:08,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1731 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:09,293 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1732 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:10,412 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1733 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:11,429 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1734 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:12,466 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1735 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:13,562 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1736 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:14,560 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1737 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:15,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1738 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:16,563 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1739 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:17,614 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1740 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:18,568 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1741 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:19,501 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1742 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:20,504 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1743 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:21,464 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1744 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:22,518 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1745 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:23,555 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1746 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:24,557 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1747 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:25,554 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1748 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:26,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1749 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:27,543 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1750 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:28,586 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1751 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:29,596 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1752 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:30,585 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1753 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:31,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1754 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:32,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1755 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:33,545 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1756 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:34,531 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1757 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:35,552 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1758 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:36,507 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1759 |
+
[INFO|generation_whisper.py:1844] 2025-02-12 18:36:37,437 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
|
1760 |
+
***** eval metrics *****
|
1761 |
+
epoch = 1.0
|
1762 |
+
eval_loss = 0.1836
|
1763 |
+
eval_runtime = 0:02:31.38
|
1764 |
+
eval_samples_per_second = 13.899
|
1765 |
+
eval_steps_per_second = 0.872
|
1766 |
+
eval_wer = 10.8862
|
1767 |
+
[INFO|trainer.py:3860] 2025-02-12 18:36:45,773 >> Saving model checkpoint to ./
|
1768 |
+
[INFO|configuration_utils.py:423] 2025-02-12 18:36:45,774 >> Configuration saved in ./config.json
|
1769 |
+
[INFO|configuration_utils.py:906] 2025-02-12 18:36:45,775 >> Configuration saved in ./generation_config.json
|
1770 |
+
[INFO|modeling_utils.py:3040] 2025-02-12 18:36:47,949 >> Model weights saved in ./model.safetensors
|
1771 |
+
[INFO|feature_extraction_utils.py:437] 2025-02-12 18:36:47,950 >> Feature extractor saved in ./preprocessor_config.json
|
1772 |
+
It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
|
1773 |
+
02/12/2025 18:36:51 - WARNING - huggingface_hub.hf_api - It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.
|
1774 |
+
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1775 |
+
warnings.warn(
|
1776 |
+
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1777 |
+
warnings.warn(
|
1778 |
+
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1779 |
+
warnings.warn(
|
1780 |
+
/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3937: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
|
1781 |
+
warnings.warn(
|
1782 |
+
run-lejyafmi.wandb: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4.69M/4.69M [00:01<00:00, 3.23MB/s]
|
wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:218ae98ab28234be327e4ea9293f7b5d13580cf3d80509614063d5a55716991b
|
3 |
+
size 4685824
|