ashik1104 commited on
Commit
081c46b
·
verified ·
1 Parent(s): cd00962

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -401
trainer_state.json DELETED
@@ -1,401 +0,0 @@
1
- {
2
- "best_metric": 0.2302779592132956,
3
- "best_model_checkpoint": "Bengali_wav2vec2_bert_punct\\checkpoint-230000",
4
- "epoch": 4.0,
5
- "eval_steps": 10000,
6
- "global_step": 232060,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.17236921485822632,
13
- "grad_norm": 4.759167671203613,
14
- "learning_rate": 4.79499913629297e-05,
15
- "loss": 1.4067,
16
- "step": 10000
17
- },
18
- {
19
- "epoch": 0.17236921485822632,
20
- "eval_WER": 0.40140793961825966,
21
- "eval_loss": 0.3006719648838043,
22
- "eval_runtime": 76.2909,
23
- "eval_samples_per_second": 39.323,
24
- "eval_steps_per_second": 4.915,
25
- "step": 10000
26
- },
27
- {
28
- "epoch": 0.34473842971645263,
29
- "grad_norm": 3.7281529903411865,
30
- "learning_rate": 4.5791371566764555e-05,
31
- "loss": 1.0498,
32
- "step": 20000
33
- },
34
- {
35
- "epoch": 0.34473842971645263,
36
- "eval_WER": 0.35401698236446766,
37
- "eval_loss": 0.24728824198246002,
38
- "eval_runtime": 78.6228,
39
- "eval_samples_per_second": 38.157,
40
- "eval_steps_per_second": 4.77,
41
- "step": 20000
42
- },
43
- {
44
- "epoch": 0.517107644574679,
45
- "grad_norm": 3.5825536251068115,
46
- "learning_rate": 4.363253584384177e-05,
47
- "loss": 0.9196,
48
- "step": 30000
49
- },
50
- {
51
- "epoch": 0.517107644574679,
52
- "eval_WER": 0.3367080339647289,
53
- "eval_loss": 0.22603251039981842,
54
- "eval_runtime": 77.3702,
55
- "eval_samples_per_second": 38.775,
56
- "eval_steps_per_second": 4.847,
57
- "step": 30000
58
- },
59
- {
60
- "epoch": 0.6894768594329053,
61
- "grad_norm": 5.719975471496582,
62
- "learning_rate": 4.147434790119192e-05,
63
- "loss": 0.8347,
64
- "step": 40000
65
- },
66
- {
67
- "epoch": 0.6894768594329053,
68
- "eval_WER": 0.32026997605051166,
69
- "eval_loss": 0.1984129399061203,
70
- "eval_runtime": 76.0573,
71
- "eval_samples_per_second": 39.444,
72
- "eval_steps_per_second": 4.93,
73
- "step": 40000
74
- },
75
- {
76
- "epoch": 0.8618460742911316,
77
- "grad_norm": 12.707213401794434,
78
- "learning_rate": 3.931594403178442e-05,
79
- "loss": 0.7728,
80
- "step": 50000
81
- },
82
- {
83
- "epoch": 0.8618460742911316,
84
- "eval_WER": 0.30205385006168806,
85
- "eval_loss": 0.18471553921699524,
86
- "eval_runtime": 77.2641,
87
- "eval_samples_per_second": 38.828,
88
- "eval_steps_per_second": 4.853,
89
- "step": 50000
90
- },
91
- {
92
- "epoch": 1.034215289149358,
93
- "grad_norm": 2.484391689300537,
94
- "learning_rate": 3.715732423561928e-05,
95
- "loss": 0.7257,
96
- "step": 60000
97
- },
98
- {
99
- "epoch": 1.034215289149358,
100
- "eval_WER": 0.29552217142027726,
101
- "eval_loss": 0.17463411390781403,
102
- "eval_runtime": 77.8642,
103
- "eval_samples_per_second": 38.529,
104
- "eval_steps_per_second": 4.816,
105
- "step": 60000
106
- },
107
- {
108
- "epoch": 1.2065845040075842,
109
- "grad_norm": 2.728205680847168,
110
- "learning_rate": 3.4999136292969425e-05,
111
- "loss": 0.6895,
112
- "step": 70000
113
- },
114
- {
115
- "epoch": 1.2065845040075842,
116
- "eval_WER": 0.2906596995427825,
117
- "eval_loss": 0.16489063203334808,
118
- "eval_runtime": 85.1263,
119
- "eval_samples_per_second": 35.242,
120
- "eval_steps_per_second": 4.405,
121
- "step": 70000
122
- },
123
- {
124
- "epoch": 1.3789537188658105,
125
- "grad_norm": 3.63918399810791,
126
- "learning_rate": 3.284030057004664e-05,
127
- "loss": 0.6653,
128
- "step": 80000
129
- },
130
- {
131
- "epoch": 1.3789537188658105,
132
- "eval_WER": 0.28917192829668337,
133
- "eval_loss": 0.16142985224723816,
134
- "eval_runtime": 77.5124,
135
- "eval_samples_per_second": 38.704,
136
- "eval_steps_per_second": 4.838,
137
- "step": 80000
138
- },
139
- {
140
- "epoch": 1.5513229337240368,
141
- "grad_norm": 3.3453214168548584,
142
- "learning_rate": 3.06816807738815e-05,
143
- "loss": 0.6418,
144
- "step": 90000
145
- },
146
- {
147
- "epoch": 1.5513229337240368,
148
- "eval_WER": 0.2765440162566224,
149
- "eval_loss": 0.1598576009273529,
150
- "eval_runtime": 77.4372,
151
- "eval_samples_per_second": 38.741,
152
- "eval_steps_per_second": 4.843,
153
- "step": 90000
154
- },
155
- {
156
- "epoch": 1.7236921485822632,
157
- "grad_norm": 3.1832072734832764,
158
- "learning_rate": 2.8523492831231646e-05,
159
- "loss": 0.6241,
160
- "step": 100000
161
- },
162
- {
163
- "epoch": 1.7236921485822632,
164
- "eval_WER": 0.2766165904637492,
165
- "eval_loss": 0.14717546105384827,
166
- "eval_runtime": 77.4687,
167
- "eval_samples_per_second": 38.725,
168
- "eval_steps_per_second": 4.841,
169
- "step": 100000
170
- },
171
- {
172
- "epoch": 1.8960613634404895,
173
- "grad_norm": 4.113869667053223,
174
- "learning_rate": 2.6364873035066505e-05,
175
- "loss": 0.6092,
176
- "step": 110000
177
- },
178
- {
179
- "epoch": 1.8960613634404895,
180
- "eval_WER": 0.26765367588359096,
181
- "eval_loss": 0.1441964954137802,
182
- "eval_runtime": 77.6139,
183
- "eval_samples_per_second": 38.653,
184
- "eval_steps_per_second": 4.832,
185
- "step": 110000
186
- },
187
- {
188
- "epoch": 2.068430578298716,
189
- "grad_norm": 1.85727858543396,
190
- "learning_rate": 2.4206253238901367e-05,
191
- "loss": 0.5805,
192
- "step": 120000
193
- },
194
- {
195
- "epoch": 2.068430578298716,
196
- "eval_WER": 0.26322664924885697,
197
- "eval_loss": 0.13760210573673248,
198
- "eval_runtime": 77.692,
199
- "eval_samples_per_second": 38.614,
200
- "eval_steps_per_second": 4.827,
201
- "step": 120000
202
- },
203
- {
204
- "epoch": 2.2407997931569423,
205
- "grad_norm": 2.774275779724121,
206
- "learning_rate": 2.204741751597858e-05,
207
- "loss": 0.5611,
208
- "step": 130000
209
- },
210
- {
211
- "epoch": 2.2407997931569423,
212
- "eval_WER": 0.2587633355105595,
213
- "eval_loss": 0.1326003223657608,
214
- "eval_runtime": 75.6029,
215
- "eval_samples_per_second": 39.681,
216
- "eval_steps_per_second": 4.96,
217
- "step": 130000
218
- },
219
- {
220
- "epoch": 2.4131690080151684,
221
- "grad_norm": 1.8835299015045166,
222
- "learning_rate": 1.9889013646571085e-05,
223
- "loss": 0.5433,
224
- "step": 140000
225
- },
226
- {
227
- "epoch": 2.4131690080151684,
228
- "eval_WER": 0.2550983380506568,
229
- "eval_loss": 0.12959806621074677,
230
- "eval_runtime": 75.9556,
231
- "eval_samples_per_second": 39.497,
232
- "eval_steps_per_second": 4.937,
233
- "step": 140000
234
- },
235
- {
236
- "epoch": 2.585538222873395,
237
- "grad_norm": 2.7739031314849854,
238
- "learning_rate": 1.773060977716359e-05,
239
- "loss": 0.5315,
240
- "step": 150000
241
- },
242
- {
243
- "epoch": 2.585538222873395,
244
- "eval_WER": 0.2523405181798389,
245
- "eval_loss": 0.12333784997463226,
246
- "eval_runtime": 75.6489,
247
- "eval_samples_per_second": 39.657,
248
- "eval_steps_per_second": 4.957,
249
- "step": 150000
250
- },
251
- {
252
- "epoch": 2.757907437731621,
253
- "grad_norm": 5.59724235534668,
254
- "learning_rate": 1.5572421834513734e-05,
255
- "loss": 0.5192,
256
- "step": 160000
257
- },
258
- {
259
- "epoch": 2.757907437731621,
260
- "eval_WER": 0.2465708687132593,
261
- "eval_loss": 0.11873666197061539,
262
- "eval_runtime": 75.813,
263
- "eval_samples_per_second": 39.571,
264
- "eval_steps_per_second": 4.946,
265
- "step": 160000
266
- },
267
- {
268
- "epoch": 2.9302766525898476,
269
- "grad_norm": 1.7511285543441772,
270
- "learning_rate": 1.3413802038348594e-05,
271
- "loss": 0.508,
272
- "step": 170000
273
- },
274
- {
275
- "epoch": 2.9302766525898476,
276
- "eval_WER": 0.24355903911749763,
277
- "eval_loss": 0.11491911858320236,
278
- "eval_runtime": 76.2555,
279
- "eval_samples_per_second": 39.341,
280
- "eval_steps_per_second": 4.918,
281
- "step": 170000
282
- },
283
- {
284
- "epoch": 3.1026458674480737,
285
- "grad_norm": 1.3240278959274292,
286
- "learning_rate": 1.1255830022456384e-05,
287
- "loss": 0.4885,
288
- "step": 180000
289
- },
290
- {
291
- "epoch": 3.1026458674480737,
292
- "eval_WER": 0.24315988097830032,
293
- "eval_loss": 0.11166874319314957,
294
- "eval_runtime": 77.1138,
295
- "eval_samples_per_second": 38.904,
296
- "eval_steps_per_second": 4.863,
297
- "step": 180000
298
- },
299
- {
300
- "epoch": 3.2750150823063002,
301
- "grad_norm": 1.0281517505645752,
302
- "learning_rate": 9.096994299533598e-06,
303
- "loss": 0.4709,
304
- "step": 190000
305
- },
306
- {
307
- "epoch": 3.2750150823063002,
308
- "eval_WER": 0.23887800275781987,
309
- "eval_loss": 0.10879674553871155,
310
- "eval_runtime": 77.1588,
311
- "eval_samples_per_second": 38.881,
312
- "eval_steps_per_second": 4.86,
313
- "step": 190000
314
- },
315
- {
316
- "epoch": 3.4473842971645263,
317
- "grad_norm": 2.2434046268463135,
318
- "learning_rate": 6.938590430126101e-06,
319
- "loss": 0.4669,
320
- "step": 200000
321
- },
322
- {
323
- "epoch": 3.4473842971645263,
324
- "eval_WER": 0.23492270846940996,
325
- "eval_loss": 0.10604960471391678,
326
- "eval_runtime": 77.0841,
327
- "eval_samples_per_second": 38.919,
328
- "eval_steps_per_second": 4.865,
329
- "step": 200000
330
- },
331
- {
332
- "epoch": 3.619753512022753,
333
- "grad_norm": 0.7335111498832703,
334
- "learning_rate": 4.780186560718605e-06,
335
- "loss": 0.451,
336
- "step": 210000
337
- },
338
- {
339
- "epoch": 3.619753512022753,
340
- "eval_WER": 0.23452355033021263,
341
- "eval_loss": 0.10134341567754745,
342
- "eval_runtime": 77.1143,
343
- "eval_samples_per_second": 38.903,
344
- "eval_steps_per_second": 4.863,
345
- "step": 210000
346
- },
347
- {
348
- "epoch": 3.792122726880979,
349
- "grad_norm": 1.7208735942840576,
350
- "learning_rate": 2.6215667645534633e-06,
351
- "loss": 0.445,
352
- "step": 220000
353
- },
354
- {
355
- "epoch": 3.792122726880979,
356
- "eval_WER": 0.2329632048769867,
357
- "eval_loss": 0.10342755168676376,
358
- "eval_runtime": 77.7151,
359
- "eval_samples_per_second": 38.603,
360
- "eval_steps_per_second": 4.825,
361
- "step": 220000
362
- },
363
- {
364
- "epoch": 3.9644919417392055,
365
- "grad_norm": 1.0953819751739502,
366
- "learning_rate": 4.6316289514596653e-07,
367
- "loss": 0.4379,
368
- "step": 230000
369
- },
370
- {
371
- "epoch": 3.9644919417392055,
372
- "eval_WER": 0.2302779592132956,
373
- "eval_loss": 0.09944533556699753,
374
- "eval_runtime": 78.1453,
375
- "eval_samples_per_second": 38.39,
376
- "eval_steps_per_second": 4.799,
377
- "step": 230000
378
- }
379
- ],
380
- "logging_steps": 10000,
381
- "max_steps": 232060,
382
- "num_input_tokens_seen": 0,
383
- "num_train_epochs": 4,
384
- "save_steps": 10000,
385
- "stateful_callbacks": {
386
- "TrainerControl": {
387
- "args": {
388
- "should_epoch_stop": false,
389
- "should_evaluate": false,
390
- "should_log": false,
391
- "should_save": true,
392
- "should_training_stop": true
393
- },
394
- "attributes": {}
395
- }
396
- },
397
- "total_flos": 4.7334394582539475e+20,
398
- "train_batch_size": 8,
399
- "trial_name": null,
400
- "trial_params": null
401
- }