desarrolloasesoreslocales commited on
Commit
bbd6df4
·
verified ·
1 Parent(s): 283544f

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.8367346938775511,
4
- "eval_loss": 0.4518675208091736,
5
- "eval_runtime": 1.1751,
6
- "eval_samples_per_second": 83.398,
7
- "eval_steps_per_second": 0.851,
8
  "total_flos": 2.86484619552768e+17,
9
- "train_loss": 0.49483771085739137,
10
- "train_runtime": 1046.6976,
11
- "train_samples_per_second": 75.571,
12
  "train_steps_per_second": 0.096
13
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.8877551020408163,
4
+ "eval_loss": 0.38076311349868774,
5
+ "eval_runtime": 1.1891,
6
+ "eval_samples_per_second": 82.412,
7
+ "eval_steps_per_second": 0.841,
8
  "total_flos": 2.86484619552768e+17,
9
+ "train_loss": 0.35724998712539674,
10
+ "train_runtime": 1042.9816,
11
+ "train_samples_per_second": 75.84,
12
  "train_steps_per_second": 0.096
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.8367346938775511,
4
- "eval_loss": 0.4518675208091736,
5
- "eval_runtime": 1.1751,
6
- "eval_samples_per_second": 83.398,
7
- "eval_steps_per_second": 0.851
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.8877551020408163,
4
+ "eval_loss": 0.38076311349868774,
5
+ "eval_runtime": 1.1891,
6
+ "eval_samples_per_second": 82.412,
7
+ "eval_steps_per_second": 0.841
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d70e8f3b7a2a106e31c55f34851a33cfcbcf904df417754854a8ba6f047e2c5
3
  size 16255128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d46445efae95f5bcd69a93fdd5cb3af95bdbc510cc4ac205d925eecd40fbe73
3
  size 16255128
runs/Apr25_08-57-27_947bdbf0ec8c/events.out.tfevents.1745576741.947bdbf0ec8c.628.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0efc39fd36b364578ffd9be26b3c5ff6e068b2a51603b0b63e31621a4938f184
3
+ size 405
runs/Apr25_10-26-59_947bdbf0ec8c/events.out.tfevents.1745576834.947bdbf0ec8c.628.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257f78de9262e2cc05418596cc8b2d18da8925e41071c2d9d1c6858d7d9e209c
3
+ size 6031
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
  "total_flos": 2.86484619552768e+17,
4
- "train_loss": 0.49483771085739137,
5
- "train_runtime": 1046.6976,
6
- "train_samples_per_second": 75.571,
7
  "train_steps_per_second": 0.096
8
  }
 
1
  {
2
  "epoch": 100.0,
3
  "total_flos": 2.86484619552768e+17,
4
+ "train_loss": 0.35724998712539674,
5
+ "train_runtime": 1042.9816,
6
+ "train_samples_per_second": 75.84,
7
  "train_steps_per_second": 0.096
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 49,
3
- "best_metric": 0.8367346938775511,
4
- "best_model_checkpoint": "efficientnet-b0-accidents/checkpoint-49",
5
  "epoch": 100.0,
6
  "eval_steps": 500,
7
  "global_step": 100,
@@ -11,981 +11,981 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.4897959183673469,
15
- "eval_loss": 0.7069449424743652,
16
- "eval_runtime": 1.2343,
17
- "eval_samples_per_second": 79.397,
18
- "eval_steps_per_second": 0.81,
19
  "step": 1
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4897959183673469,
24
- "eval_loss": 0.7112836837768555,
25
- "eval_runtime": 1.1578,
26
- "eval_samples_per_second": 84.646,
27
- "eval_steps_per_second": 0.864,
28
  "step": 2
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.5102040816326531,
33
- "eval_loss": 0.7033699750900269,
34
- "eval_runtime": 1.18,
35
- "eval_samples_per_second": 83.053,
36
- "eval_steps_per_second": 0.847,
37
  "step": 3
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.5408163265306123,
42
- "eval_loss": 0.6949645280838013,
43
- "eval_runtime": 1.1744,
44
- "eval_samples_per_second": 83.45,
45
- "eval_steps_per_second": 0.852,
46
  "step": 4
47
  },
48
  {
49
  "epoch": 5.0,
50
- "eval_accuracy": 0.5612244897959183,
51
- "eval_loss": 0.694236695766449,
52
- "eval_runtime": 1.1848,
53
- "eval_samples_per_second": 82.713,
54
- "eval_steps_per_second": 0.844,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 6.0,
59
- "eval_accuracy": 0.5306122448979592,
60
- "eval_loss": 0.6846552491188049,
61
- "eval_runtime": 1.159,
62
- "eval_samples_per_second": 84.554,
63
- "eval_steps_per_second": 0.863,
64
  "step": 6
65
  },
66
  {
67
  "epoch": 7.0,
68
- "eval_accuracy": 0.5204081632653061,
69
- "eval_loss": 0.6865835189819336,
70
- "eval_runtime": 1.1556,
71
- "eval_samples_per_second": 84.803,
72
- "eval_steps_per_second": 0.865,
73
  "step": 7
74
  },
75
  {
76
  "epoch": 8.0,
77
- "eval_accuracy": 0.5816326530612245,
78
- "eval_loss": 0.6743000745773315,
79
- "eval_runtime": 1.176,
80
- "eval_samples_per_second": 83.332,
81
- "eval_steps_per_second": 0.85,
82
  "step": 8
83
  },
84
  {
85
  "epoch": 9.0,
86
- "eval_accuracy": 0.5714285714285714,
87
- "eval_loss": 0.6716513633728027,
88
- "eval_runtime": 1.1533,
89
- "eval_samples_per_second": 84.974,
90
- "eval_steps_per_second": 0.867,
91
  "step": 9
92
  },
93
  {
94
  "epoch": 10.0,
95
- "grad_norm": 0.6979305148124695,
96
  "learning_rate": 9e-05,
97
- "loss": 0.6848,
98
  "step": 10
99
  },
100
  {
101
  "epoch": 10.0,
102
- "eval_accuracy": 0.6428571428571429,
103
- "eval_loss": 0.6547528505325317,
104
- "eval_runtime": 1.1525,
105
- "eval_samples_per_second": 85.03,
106
- "eval_steps_per_second": 0.868,
107
  "step": 10
108
  },
109
  {
110
  "epoch": 11.0,
111
- "eval_accuracy": 0.6020408163265306,
112
- "eval_loss": 0.6551060676574707,
113
- "eval_runtime": 1.1814,
114
- "eval_samples_per_second": 82.95,
115
- "eval_steps_per_second": 0.846,
116
  "step": 11
117
  },
118
  {
119
  "epoch": 12.0,
120
- "eval_accuracy": 0.5918367346938775,
121
- "eval_loss": 0.6481595039367676,
122
- "eval_runtime": 1.1945,
123
- "eval_samples_per_second": 82.042,
124
- "eval_steps_per_second": 0.837,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 13.0,
129
- "eval_accuracy": 0.6632653061224489,
130
- "eval_loss": 0.6407487988471985,
131
- "eval_runtime": 1.2052,
132
- "eval_samples_per_second": 81.317,
133
- "eval_steps_per_second": 0.83,
134
  "step": 13
135
  },
136
  {
137
  "epoch": 14.0,
138
- "eval_accuracy": 0.7040816326530612,
139
- "eval_loss": 0.6281946301460266,
140
- "eval_runtime": 1.1797,
141
- "eval_samples_per_second": 83.069,
142
- "eval_steps_per_second": 0.848,
143
  "step": 14
144
  },
145
  {
146
  "epoch": 15.0,
147
- "eval_accuracy": 0.6836734693877551,
148
- "eval_loss": 0.6241097450256348,
149
- "eval_runtime": 1.1429,
150
- "eval_samples_per_second": 85.746,
151
- "eval_steps_per_second": 0.875,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 16.0,
156
- "eval_accuracy": 0.7142857142857143,
157
- "eval_loss": 0.6189384460449219,
158
- "eval_runtime": 1.1668,
159
- "eval_samples_per_second": 83.988,
160
- "eval_steps_per_second": 0.857,
161
  "step": 16
162
  },
163
  {
164
  "epoch": 17.0,
165
- "eval_accuracy": 0.6836734693877551,
166
- "eval_loss": 0.5998439192771912,
167
- "eval_runtime": 1.1666,
168
- "eval_samples_per_second": 84.007,
169
- "eval_steps_per_second": 0.857,
170
  "step": 17
171
  },
172
  {
173
  "epoch": 18.0,
174
- "eval_accuracy": 0.6632653061224489,
175
- "eval_loss": 0.609135627746582,
176
- "eval_runtime": 1.2178,
177
- "eval_samples_per_second": 80.476,
178
- "eval_steps_per_second": 0.821,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 19.0,
183
- "eval_accuracy": 0.7142857142857143,
184
- "eval_loss": 0.5961523056030273,
185
- "eval_runtime": 1.1716,
186
- "eval_samples_per_second": 83.647,
187
- "eval_steps_per_second": 0.854,
188
  "step": 19
189
  },
190
  {
191
  "epoch": 20.0,
192
- "grad_norm": 0.5801095366477966,
193
  "learning_rate": 9e-05,
194
- "loss": 0.6177,
195
  "step": 20
196
  },
197
  {
198
  "epoch": 20.0,
199
- "eval_accuracy": 0.6938775510204082,
200
- "eval_loss": 0.5860151648521423,
201
- "eval_runtime": 1.162,
202
- "eval_samples_per_second": 84.34,
203
- "eval_steps_per_second": 0.861,
204
  "step": 20
205
  },
206
  {
207
  "epoch": 21.0,
208
- "eval_accuracy": 0.6836734693877551,
209
- "eval_loss": 0.6065893173217773,
210
- "eval_runtime": 1.156,
211
- "eval_samples_per_second": 84.777,
212
- "eval_steps_per_second": 0.865,
213
  "step": 21
214
  },
215
  {
216
  "epoch": 22.0,
217
- "eval_accuracy": 0.7448979591836735,
218
- "eval_loss": 0.5802229642868042,
219
- "eval_runtime": 1.1717,
220
- "eval_samples_per_second": 83.64,
221
- "eval_steps_per_second": 0.853,
222
  "step": 22
223
  },
224
  {
225
  "epoch": 23.0,
226
- "eval_accuracy": 0.7448979591836735,
227
- "eval_loss": 0.5803083777427673,
228
- "eval_runtime": 1.1718,
229
- "eval_samples_per_second": 83.634,
230
- "eval_steps_per_second": 0.853,
231
  "step": 23
232
  },
233
  {
234
  "epoch": 24.0,
235
- "eval_accuracy": 0.7551020408163265,
236
- "eval_loss": 0.5717583894729614,
237
- "eval_runtime": 1.1582,
238
- "eval_samples_per_second": 84.613,
239
- "eval_steps_per_second": 0.863,
240
  "step": 24
241
  },
242
  {
243
  "epoch": 25.0,
244
- "eval_accuracy": 0.7040816326530612,
245
- "eval_loss": 0.5688303112983704,
246
- "eval_runtime": 1.169,
247
- "eval_samples_per_second": 83.83,
248
- "eval_steps_per_second": 0.855,
249
  "step": 25
250
  },
251
  {
252
  "epoch": 26.0,
253
- "eval_accuracy": 0.7244897959183674,
254
- "eval_loss": 0.558906614780426,
255
- "eval_runtime": 1.1606,
256
- "eval_samples_per_second": 84.442,
257
- "eval_steps_per_second": 0.862,
258
  "step": 26
259
  },
260
  {
261
  "epoch": 27.0,
262
- "eval_accuracy": 0.7244897959183674,
263
- "eval_loss": 0.5467594861984253,
264
- "eval_runtime": 1.228,
265
- "eval_samples_per_second": 79.805,
266
- "eval_steps_per_second": 0.814,
267
  "step": 27
268
  },
269
  {
270
  "epoch": 28.0,
271
- "eval_accuracy": 0.6938775510204082,
272
- "eval_loss": 0.5388599634170532,
273
- "eval_runtime": 1.1813,
274
- "eval_samples_per_second": 82.963,
275
- "eval_steps_per_second": 0.847,
276
  "step": 28
277
  },
278
  {
279
  "epoch": 29.0,
280
- "eval_accuracy": 0.7551020408163265,
281
- "eval_loss": 0.5336747169494629,
282
- "eval_runtime": 1.1683,
283
- "eval_samples_per_second": 83.881,
284
- "eval_steps_per_second": 0.856,
285
  "step": 29
286
  },
287
  {
288
  "epoch": 30.0,
289
- "grad_norm": 0.5761419534683228,
290
  "learning_rate": 7.88888888888889e-05,
291
- "loss": 0.5545,
292
  "step": 30
293
  },
294
  {
295
  "epoch": 30.0,
296
- "eval_accuracy": 0.7346938775510204,
297
- "eval_loss": 0.528902530670166,
298
- "eval_runtime": 1.1716,
299
- "eval_samples_per_second": 83.645,
300
- "eval_steps_per_second": 0.854,
301
  "step": 30
302
  },
303
  {
304
  "epoch": 31.0,
305
- "eval_accuracy": 0.7448979591836735,
306
- "eval_loss": 0.5381345748901367,
307
- "eval_runtime": 1.1607,
308
- "eval_samples_per_second": 84.429,
309
- "eval_steps_per_second": 0.862,
310
  "step": 31
311
  },
312
  {
313
  "epoch": 32.0,
314
- "eval_accuracy": 0.7857142857142857,
315
- "eval_loss": 0.5213983654975891,
316
- "eval_runtime": 1.1897,
317
- "eval_samples_per_second": 82.372,
318
- "eval_steps_per_second": 0.841,
319
  "step": 32
320
  },
321
  {
322
  "epoch": 33.0,
323
- "eval_accuracy": 0.7346938775510204,
324
- "eval_loss": 0.5152321457862854,
325
- "eval_runtime": 1.159,
326
- "eval_samples_per_second": 84.555,
327
- "eval_steps_per_second": 0.863,
328
  "step": 33
329
  },
330
  {
331
  "epoch": 34.0,
332
- "eval_accuracy": 0.7040816326530612,
333
- "eval_loss": 0.509580671787262,
334
- "eval_runtime": 1.1649,
335
- "eval_samples_per_second": 84.127,
336
- "eval_steps_per_second": 0.858,
337
  "step": 34
338
  },
339
  {
340
  "epoch": 35.0,
341
- "eval_accuracy": 0.6836734693877551,
342
- "eval_loss": 0.5266232490539551,
343
- "eval_runtime": 1.1617,
344
- "eval_samples_per_second": 84.359,
345
- "eval_steps_per_second": 0.861,
346
  "step": 35
347
  },
348
  {
349
  "epoch": 36.0,
350
- "eval_accuracy": 0.7959183673469388,
351
- "eval_loss": 0.5110865831375122,
352
- "eval_runtime": 1.1708,
353
- "eval_samples_per_second": 83.701,
354
- "eval_steps_per_second": 0.854,
355
  "step": 36
356
  },
357
  {
358
  "epoch": 37.0,
359
- "eval_accuracy": 0.7551020408163265,
360
- "eval_loss": 0.49542251229286194,
361
- "eval_runtime": 1.1679,
362
- "eval_samples_per_second": 83.913,
363
- "eval_steps_per_second": 0.856,
364
  "step": 37
365
  },
366
  {
367
  "epoch": 38.0,
368
- "eval_accuracy": 0.7551020408163265,
369
- "eval_loss": 0.510901927947998,
370
- "eval_runtime": 1.1533,
371
- "eval_samples_per_second": 84.976,
372
- "eval_steps_per_second": 0.867,
373
  "step": 38
374
  },
375
  {
376
  "epoch": 39.0,
377
- "eval_accuracy": 0.7653061224489796,
378
- "eval_loss": 0.4929142892360687,
379
- "eval_runtime": 1.1524,
380
- "eval_samples_per_second": 85.037,
381
- "eval_steps_per_second": 0.868,
382
  "step": 39
383
  },
384
  {
385
  "epoch": 40.0,
386
- "grad_norm": 0.5709299445152283,
387
  "learning_rate": 6.777777777777778e-05,
388
- "loss": 0.5215,
389
  "step": 40
390
  },
391
  {
392
  "epoch": 40.0,
393
- "eval_accuracy": 0.7857142857142857,
394
- "eval_loss": 0.4865826666355133,
395
- "eval_runtime": 1.1621,
396
- "eval_samples_per_second": 84.328,
397
- "eval_steps_per_second": 0.86,
398
  "step": 40
399
  },
400
  {
401
  "epoch": 41.0,
402
- "eval_accuracy": 0.7244897959183674,
403
- "eval_loss": 0.4810398519039154,
404
- "eval_runtime": 1.1724,
405
- "eval_samples_per_second": 83.587,
406
- "eval_steps_per_second": 0.853,
407
  "step": 41
408
  },
409
  {
410
  "epoch": 42.0,
411
- "eval_accuracy": 0.7755102040816326,
412
- "eval_loss": 0.48122912645339966,
413
- "eval_runtime": 1.1575,
414
- "eval_samples_per_second": 84.664,
415
- "eval_steps_per_second": 0.864,
416
  "step": 42
417
  },
418
  {
419
  "epoch": 43.0,
420
- "eval_accuracy": 0.7857142857142857,
421
- "eval_loss": 0.4918031096458435,
422
- "eval_runtime": 1.1694,
423
- "eval_samples_per_second": 83.803,
424
- "eval_steps_per_second": 0.855,
425
  "step": 43
426
  },
427
  {
428
  "epoch": 44.0,
429
- "eval_accuracy": 0.7857142857142857,
430
- "eval_loss": 0.4652838110923767,
431
- "eval_runtime": 1.1691,
432
- "eval_samples_per_second": 83.824,
433
- "eval_steps_per_second": 0.855,
434
  "step": 44
435
  },
436
  {
437
  "epoch": 45.0,
438
- "eval_accuracy": 0.7346938775510204,
439
- "eval_loss": 0.4883042871952057,
440
- "eval_runtime": 1.1599,
441
- "eval_samples_per_second": 84.491,
442
- "eval_steps_per_second": 0.862,
443
  "step": 45
444
  },
445
  {
446
  "epoch": 46.0,
447
- "eval_accuracy": 0.7959183673469388,
448
- "eval_loss": 0.47950759530067444,
449
- "eval_runtime": 1.1792,
450
- "eval_samples_per_second": 83.11,
451
- "eval_steps_per_second": 0.848,
452
  "step": 46
453
  },
454
  {
455
  "epoch": 47.0,
456
- "eval_accuracy": 0.7551020408163265,
457
- "eval_loss": 0.5048059225082397,
458
- "eval_runtime": 1.1607,
459
- "eval_samples_per_second": 84.435,
460
- "eval_steps_per_second": 0.862,
461
  "step": 47
462
  },
463
  {
464
  "epoch": 48.0,
465
- "eval_accuracy": 0.7755102040816326,
466
- "eval_loss": 0.4864862859249115,
467
- "eval_runtime": 1.1696,
468
- "eval_samples_per_second": 83.792,
469
- "eval_steps_per_second": 0.855,
470
  "step": 48
471
  },
472
  {
473
  "epoch": 49.0,
474
- "eval_accuracy": 0.8367346938775511,
475
- "eval_loss": 0.4518675208091736,
476
- "eval_runtime": 1.2497,
477
- "eval_samples_per_second": 78.419,
478
- "eval_steps_per_second": 0.8,
479
  "step": 49
480
  },
481
  {
482
  "epoch": 50.0,
483
- "grad_norm": 0.6331581473350525,
484
  "learning_rate": 5.666666666666667e-05,
485
- "loss": 0.4859,
486
  "step": 50
487
  },
488
  {
489
  "epoch": 50.0,
490
- "eval_accuracy": 0.7551020408163265,
491
- "eval_loss": 0.47076812386512756,
492
- "eval_runtime": 1.1678,
493
- "eval_samples_per_second": 83.921,
494
- "eval_steps_per_second": 0.856,
495
  "step": 50
496
  },
497
  {
498
  "epoch": 51.0,
499
- "eval_accuracy": 0.8367346938775511,
500
- "eval_loss": 0.44281908869743347,
501
- "eval_runtime": 1.1535,
502
- "eval_samples_per_second": 84.962,
503
- "eval_steps_per_second": 0.867,
504
  "step": 51
505
  },
506
  {
507
  "epoch": 52.0,
508
- "eval_accuracy": 0.8163265306122449,
509
- "eval_loss": 0.45022523403167725,
510
- "eval_runtime": 1.1569,
511
- "eval_samples_per_second": 84.711,
512
- "eval_steps_per_second": 0.864,
513
  "step": 52
514
  },
515
  {
516
  "epoch": 53.0,
517
- "eval_accuracy": 0.7857142857142857,
518
- "eval_loss": 0.4585103690624237,
519
- "eval_runtime": 1.1646,
520
- "eval_samples_per_second": 84.152,
521
- "eval_steps_per_second": 0.859,
522
  "step": 53
523
  },
524
  {
525
  "epoch": 54.0,
526
- "eval_accuracy": 0.8163265306122449,
527
- "eval_loss": 0.4413427710533142,
528
- "eval_runtime": 1.1647,
529
- "eval_samples_per_second": 84.14,
530
- "eval_steps_per_second": 0.859,
531
  "step": 54
532
  },
533
  {
534
  "epoch": 55.0,
535
- "eval_accuracy": 0.7857142857142857,
536
- "eval_loss": 0.4472661018371582,
537
- "eval_runtime": 1.1595,
538
- "eval_samples_per_second": 84.52,
539
- "eval_steps_per_second": 0.862,
540
  "step": 55
541
  },
542
  {
543
  "epoch": 56.0,
544
- "eval_accuracy": 0.7959183673469388,
545
- "eval_loss": 0.442340224981308,
546
- "eval_runtime": 1.2245,
547
- "eval_samples_per_second": 80.035,
548
- "eval_steps_per_second": 0.817,
549
  "step": 56
550
  },
551
  {
552
  "epoch": 57.0,
553
- "eval_accuracy": 0.8061224489795918,
554
- "eval_loss": 0.44107988476753235,
555
- "eval_runtime": 1.1697,
556
- "eval_samples_per_second": 83.784,
557
- "eval_steps_per_second": 0.855,
558
  "step": 57
559
  },
560
  {
561
  "epoch": 58.0,
562
- "eval_accuracy": 0.826530612244898,
563
- "eval_loss": 0.43629103899002075,
564
- "eval_runtime": 1.1536,
565
- "eval_samples_per_second": 84.955,
566
- "eval_steps_per_second": 0.867,
567
  "step": 58
568
  },
569
  {
570
  "epoch": 59.0,
571
- "eval_accuracy": 0.7857142857142857,
572
- "eval_loss": 0.4597272574901581,
573
- "eval_runtime": 1.1768,
574
- "eval_samples_per_second": 83.274,
575
- "eval_steps_per_second": 0.85,
576
  "step": 59
577
  },
578
  {
579
  "epoch": 60.0,
580
- "grad_norm": 0.4986366033554077,
581
  "learning_rate": 4.555555555555556e-05,
582
- "loss": 0.4398,
583
  "step": 60
584
  },
585
  {
586
  "epoch": 60.0,
587
- "eval_accuracy": 0.8061224489795918,
588
- "eval_loss": 0.4168866276741028,
589
- "eval_runtime": 1.161,
590
- "eval_samples_per_second": 84.413,
591
- "eval_steps_per_second": 0.861,
592
  "step": 60
593
  },
594
  {
595
  "epoch": 61.0,
596
- "eval_accuracy": 0.826530612244898,
597
- "eval_loss": 0.42812231183052063,
598
- "eval_runtime": 1.1619,
599
- "eval_samples_per_second": 84.343,
600
- "eval_steps_per_second": 0.861,
601
  "step": 61
602
  },
603
  {
604
  "epoch": 62.0,
605
- "eval_accuracy": 0.8061224489795918,
606
- "eval_loss": 0.41872650384902954,
607
- "eval_runtime": 1.1688,
608
- "eval_samples_per_second": 83.845,
609
- "eval_steps_per_second": 0.856,
610
  "step": 62
611
  },
612
  {
613
  "epoch": 63.0,
614
- "eval_accuracy": 0.8061224489795918,
615
- "eval_loss": 0.43582549691200256,
616
- "eval_runtime": 1.2068,
617
- "eval_samples_per_second": 81.204,
618
- "eval_steps_per_second": 0.829,
619
  "step": 63
620
  },
621
  {
622
  "epoch": 64.0,
623
- "eval_accuracy": 0.7653061224489796,
624
- "eval_loss": 0.4350709319114685,
625
- "eval_runtime": 1.2449,
626
- "eval_samples_per_second": 78.72,
627
- "eval_steps_per_second": 0.803,
628
  "step": 64
629
  },
630
  {
631
  "epoch": 65.0,
632
- "eval_accuracy": 0.7959183673469388,
633
- "eval_loss": 0.4329792857170105,
634
- "eval_runtime": 1.1677,
635
- "eval_samples_per_second": 83.927,
636
- "eval_steps_per_second": 0.856,
637
  "step": 65
638
  },
639
  {
640
  "epoch": 66.0,
641
- "eval_accuracy": 0.826530612244898,
642
- "eval_loss": 0.40659093856811523,
643
- "eval_runtime": 1.154,
644
- "eval_samples_per_second": 84.918,
645
- "eval_steps_per_second": 0.867,
646
  "step": 66
647
  },
648
  {
649
  "epoch": 67.0,
650
- "eval_accuracy": 0.8163265306122449,
651
- "eval_loss": 0.42853957414627075,
652
- "eval_runtime": 1.1685,
653
- "eval_samples_per_second": 83.868,
654
- "eval_steps_per_second": 0.856,
655
  "step": 67
656
  },
657
  {
658
  "epoch": 68.0,
659
- "eval_accuracy": 0.7653061224489796,
660
- "eval_loss": 0.4496270716190338,
661
- "eval_runtime": 1.1622,
662
- "eval_samples_per_second": 84.325,
663
- "eval_steps_per_second": 0.86,
664
  "step": 68
665
  },
666
  {
667
  "epoch": 69.0,
668
- "eval_accuracy": 0.826530612244898,
669
- "eval_loss": 0.39742252230644226,
670
- "eval_runtime": 1.1891,
671
- "eval_samples_per_second": 82.413,
672
- "eval_steps_per_second": 0.841,
673
  "step": 69
674
  },
675
  {
676
  "epoch": 70.0,
677
- "grad_norm": 0.5778128504753113,
678
  "learning_rate": 3.444444444444445e-05,
679
- "loss": 0.4268,
680
  "step": 70
681
  },
682
  {
683
  "epoch": 70.0,
684
- "eval_accuracy": 0.826530612244898,
685
- "eval_loss": 0.39837557077407837,
686
- "eval_runtime": 1.1889,
687
- "eval_samples_per_second": 82.431,
688
- "eval_steps_per_second": 0.841,
689
  "step": 70
690
  },
691
  {
692
  "epoch": 71.0,
693
- "eval_accuracy": 0.8061224489795918,
694
- "eval_loss": 0.4166240990161896,
695
- "eval_runtime": 1.1595,
696
- "eval_samples_per_second": 84.517,
697
- "eval_steps_per_second": 0.862,
698
  "step": 71
699
  },
700
  {
701
  "epoch": 72.0,
702
- "eval_accuracy": 0.8163265306122449,
703
- "eval_loss": 0.42053714394569397,
704
- "eval_runtime": 1.168,
705
- "eval_samples_per_second": 83.903,
706
- "eval_steps_per_second": 0.856,
707
  "step": 72
708
  },
709
  {
710
  "epoch": 73.0,
711
- "eval_accuracy": 0.7959183673469388,
712
- "eval_loss": 0.43899521231651306,
713
- "eval_runtime": 1.1608,
714
- "eval_samples_per_second": 84.422,
715
- "eval_steps_per_second": 0.861,
716
  "step": 73
717
  },
718
  {
719
  "epoch": 74.0,
720
- "eval_accuracy": 0.826530612244898,
721
- "eval_loss": 0.41976168751716614,
722
- "eval_runtime": 1.1663,
723
- "eval_samples_per_second": 84.028,
724
- "eval_steps_per_second": 0.857,
725
  "step": 74
726
  },
727
  {
728
  "epoch": 75.0,
729
- "eval_accuracy": 0.8367346938775511,
730
- "eval_loss": 0.3999468684196472,
731
- "eval_runtime": 1.1522,
732
- "eval_samples_per_second": 85.051,
733
- "eval_steps_per_second": 0.868,
734
  "step": 75
735
  },
736
  {
737
  "epoch": 76.0,
738
- "eval_accuracy": 0.7857142857142857,
739
- "eval_loss": 0.4234585762023926,
740
- "eval_runtime": 1.1526,
741
- "eval_samples_per_second": 85.022,
742
- "eval_steps_per_second": 0.868,
743
  "step": 76
744
  },
745
  {
746
  "epoch": 77.0,
747
- "eval_accuracy": 0.7755102040816326,
748
- "eval_loss": 0.4313722252845764,
749
- "eval_runtime": 1.15,
750
- "eval_samples_per_second": 85.219,
751
- "eval_steps_per_second": 0.87,
752
  "step": 77
753
  },
754
  {
755
  "epoch": 78.0,
756
  "eval_accuracy": 0.8367346938775511,
757
- "eval_loss": 0.4021131694316864,
758
- "eval_runtime": 1.1683,
759
- "eval_samples_per_second": 83.886,
760
- "eval_steps_per_second": 0.856,
761
  "step": 78
762
  },
763
  {
764
  "epoch": 79.0,
765
- "eval_accuracy": 0.8163265306122449,
766
- "eval_loss": 0.4189140200614929,
767
- "eval_runtime": 1.1723,
768
- "eval_samples_per_second": 83.594,
769
  "eval_steps_per_second": 0.853,
770
  "step": 79
771
  },
772
  {
773
  "epoch": 80.0,
774
- "grad_norm": 0.6524766087532043,
775
  "learning_rate": 2.3333333333333336e-05,
776
- "loss": 0.4185,
777
  "step": 80
778
  },
779
  {
780
  "epoch": 80.0,
781
- "eval_accuracy": 0.8163265306122449,
782
- "eval_loss": 0.41430193185806274,
783
- "eval_runtime": 1.161,
784
- "eval_samples_per_second": 84.413,
785
- "eval_steps_per_second": 0.861,
786
  "step": 80
787
  },
788
  {
789
  "epoch": 81.0,
790
- "eval_accuracy": 0.7959183673469388,
791
- "eval_loss": 0.43291813135147095,
792
- "eval_runtime": 1.1491,
793
- "eval_samples_per_second": 85.281,
794
- "eval_steps_per_second": 0.87,
795
  "step": 81
796
  },
797
  {
798
  "epoch": 82.0,
799
- "eval_accuracy": 0.8061224489795918,
800
- "eval_loss": 0.42231523990631104,
801
- "eval_runtime": 1.1676,
802
- "eval_samples_per_second": 83.931,
803
- "eval_steps_per_second": 0.856,
804
  "step": 82
805
  },
806
  {
807
  "epoch": 83.0,
808
  "eval_accuracy": 0.826530612244898,
809
- "eval_loss": 0.4024517238140106,
810
- "eval_runtime": 1.1532,
811
- "eval_samples_per_second": 84.981,
812
- "eval_steps_per_second": 0.867,
813
  "step": 83
814
  },
815
  {
816
  "epoch": 84.0,
817
- "eval_accuracy": 0.7857142857142857,
818
- "eval_loss": 0.42878466844558716,
819
- "eval_runtime": 1.1683,
820
- "eval_samples_per_second": 83.884,
821
- "eval_steps_per_second": 0.856,
822
  "step": 84
823
  },
824
  {
825
  "epoch": 85.0,
826
- "eval_accuracy": 0.8163265306122449,
827
- "eval_loss": 0.4215553402900696,
828
- "eval_runtime": 1.1692,
829
- "eval_samples_per_second": 83.817,
830
- "eval_steps_per_second": 0.855,
831
  "step": 85
832
  },
833
  {
834
  "epoch": 86.0,
835
- "eval_accuracy": 0.826530612244898,
836
- "eval_loss": 0.41396430134773254,
837
- "eval_runtime": 1.1657,
838
- "eval_samples_per_second": 84.066,
839
- "eval_steps_per_second": 0.858,
840
  "step": 86
841
  },
842
  {
843
  "epoch": 87.0,
844
- "eval_accuracy": 0.8367346938775511,
845
- "eval_loss": 0.3982178866863251,
846
- "eval_runtime": 1.1612,
847
- "eval_samples_per_second": 84.394,
848
- "eval_steps_per_second": 0.861,
849
  "step": 87
850
  },
851
  {
852
  "epoch": 88.0,
853
- "eval_accuracy": 0.8163265306122449,
854
- "eval_loss": 0.40849459171295166,
855
- "eval_runtime": 1.1499,
856
- "eval_samples_per_second": 85.227,
857
- "eval_steps_per_second": 0.87,
858
  "step": 88
859
  },
860
  {
861
  "epoch": 89.0,
862
- "eval_accuracy": 0.8163265306122449,
863
- "eval_loss": 0.42925825715065,
864
- "eval_runtime": 1.1765,
865
- "eval_samples_per_second": 83.298,
866
- "eval_steps_per_second": 0.85,
867
  "step": 89
868
  },
869
  {
870
  "epoch": 90.0,
871
- "grad_norm": 0.4931574761867523,
872
  "learning_rate": 1.2222222222222222e-05,
873
- "loss": 0.4034,
874
  "step": 90
875
  },
876
  {
877
  "epoch": 90.0,
878
- "eval_accuracy": 0.826530612244898,
879
- "eval_loss": 0.39117440581321716,
880
- "eval_runtime": 1.1647,
881
- "eval_samples_per_second": 84.141,
882
- "eval_steps_per_second": 0.859,
883
  "step": 90
884
  },
885
  {
886
  "epoch": 91.0,
887
- "eval_accuracy": 0.8163265306122449,
888
- "eval_loss": 0.4017205238342285,
889
- "eval_runtime": 1.1624,
890
- "eval_samples_per_second": 84.311,
891
- "eval_steps_per_second": 0.86,
892
  "step": 91
893
  },
894
  {
895
  "epoch": 92.0,
896
- "eval_accuracy": 0.8061224489795918,
897
- "eval_loss": 0.4331374168395996,
898
- "eval_runtime": 1.2042,
899
- "eval_samples_per_second": 81.381,
900
  "eval_steps_per_second": 0.83,
901
  "step": 92
902
  },
903
  {
904
  "epoch": 93.0,
905
- "eval_accuracy": 0.7959183673469388,
906
- "eval_loss": 0.40544551610946655,
907
- "eval_runtime": 1.1592,
908
- "eval_samples_per_second": 84.544,
909
- "eval_steps_per_second": 0.863,
910
  "step": 93
911
  },
912
  {
913
  "epoch": 94.0,
914
- "eval_accuracy": 0.8367346938775511,
915
- "eval_loss": 0.3894374370574951,
916
- "eval_runtime": 1.156,
917
- "eval_samples_per_second": 84.777,
918
- "eval_steps_per_second": 0.865,
919
  "step": 94
920
  },
921
  {
922
  "epoch": 95.0,
923
- "eval_accuracy": 0.826530612244898,
924
- "eval_loss": 0.4080319404602051,
925
- "eval_runtime": 1.1486,
926
- "eval_samples_per_second": 85.321,
927
- "eval_steps_per_second": 0.871,
928
  "step": 95
929
  },
930
  {
931
  "epoch": 96.0,
932
- "eval_accuracy": 0.826530612244898,
933
- "eval_loss": 0.40165218710899353,
934
- "eval_runtime": 1.1675,
935
- "eval_samples_per_second": 83.937,
936
- "eval_steps_per_second": 0.856,
937
  "step": 96
938
  },
939
  {
940
  "epoch": 97.0,
941
- "eval_accuracy": 0.8367346938775511,
942
- "eval_loss": 0.4095234274864197,
943
- "eval_runtime": 1.164,
944
- "eval_samples_per_second": 84.193,
945
- "eval_steps_per_second": 0.859,
946
  "step": 97
947
  },
948
  {
949
  "epoch": 98.0,
950
- "eval_accuracy": 0.8163265306122449,
951
- "eval_loss": 0.42752060294151306,
952
- "eval_runtime": 1.1839,
953
- "eval_samples_per_second": 82.779,
954
- "eval_steps_per_second": 0.845,
955
  "step": 98
956
  },
957
  {
958
  "epoch": 99.0,
959
- "eval_accuracy": 0.826530612244898,
960
- "eval_loss": 0.40013304352760315,
961
- "eval_runtime": 1.1637,
962
- "eval_samples_per_second": 84.215,
963
- "eval_steps_per_second": 0.859,
964
  "step": 99
965
  },
966
  {
967
  "epoch": 100.0,
968
- "grad_norm": 0.5911010503768921,
969
  "learning_rate": 1.1111111111111112e-06,
970
- "loss": 0.3955,
971
  "step": 100
972
  },
973
  {
974
  "epoch": 100.0,
975
- "eval_accuracy": 0.8061224489795918,
976
- "eval_loss": 0.4125175476074219,
977
- "eval_runtime": 1.1372,
978
- "eval_samples_per_second": 86.179,
979
- "eval_steps_per_second": 0.879,
980
  "step": 100
981
  },
982
  {
983
  "epoch": 100.0,
984
  "step": 100,
985
  "total_flos": 2.86484619552768e+17,
986
- "train_loss": 0.49483771085739137,
987
- "train_runtime": 1046.6976,
988
- "train_samples_per_second": 75.571,
989
  "train_steps_per_second": 0.096
990
  }
991
  ],
 
1
  {
2
+ "best_global_step": 59,
3
+ "best_metric": 0.8877551020408163,
4
+ "best_model_checkpoint": "efficientnet-b0-accidents/checkpoint-59",
5
  "epoch": 100.0,
6
  "eval_steps": 500,
7
  "global_step": 100,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.7959183673469388,
15
+ "eval_loss": 0.4779815375804901,
16
+ "eval_runtime": 1.1795,
17
+ "eval_samples_per_second": 83.084,
18
+ "eval_steps_per_second": 0.848,
19
  "step": 1
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.7959183673469388,
24
+ "eval_loss": 0.46097320318222046,
25
+ "eval_runtime": 1.1909,
26
+ "eval_samples_per_second": 82.291,
27
+ "eval_steps_per_second": 0.84,
28
  "step": 2
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.7755102040816326,
33
+ "eval_loss": 0.45180079340934753,
34
+ "eval_runtime": 1.1886,
35
+ "eval_samples_per_second": 82.451,
36
+ "eval_steps_per_second": 0.841,
37
  "step": 3
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.8163265306122449,
42
+ "eval_loss": 0.44495007395744324,
43
+ "eval_runtime": 1.1829,
44
+ "eval_samples_per_second": 82.845,
45
+ "eval_steps_per_second": 0.845,
46
  "step": 4
47
  },
48
  {
49
  "epoch": 5.0,
50
+ "eval_accuracy": 0.826530612244898,
51
+ "eval_loss": 0.44236451387405396,
52
+ "eval_runtime": 1.1745,
53
+ "eval_samples_per_second": 83.437,
54
+ "eval_steps_per_second": 0.851,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 6.0,
59
+ "eval_accuracy": 0.7959183673469388,
60
+ "eval_loss": 0.44830843806266785,
61
+ "eval_runtime": 1.1934,
62
+ "eval_samples_per_second": 82.117,
63
+ "eval_steps_per_second": 0.838,
64
  "step": 6
65
  },
66
  {
67
  "epoch": 7.0,
68
+ "eval_accuracy": 0.7959183673469388,
69
+ "eval_loss": 0.4532928168773651,
70
+ "eval_runtime": 1.2016,
71
+ "eval_samples_per_second": 81.561,
72
+ "eval_steps_per_second": 0.832,
73
  "step": 7
74
  },
75
  {
76
  "epoch": 8.0,
77
+ "eval_accuracy": 0.7959183673469388,
78
+ "eval_loss": 0.4556555151939392,
79
+ "eval_runtime": 1.1751,
80
+ "eval_samples_per_second": 83.399,
81
+ "eval_steps_per_second": 0.851,
82
  "step": 8
83
  },
84
  {
85
  "epoch": 9.0,
86
+ "eval_accuracy": 0.826530612244898,
87
+ "eval_loss": 0.4555840492248535,
88
+ "eval_runtime": 1.1859,
89
+ "eval_samples_per_second": 82.637,
90
+ "eval_steps_per_second": 0.843,
91
  "step": 9
92
  },
93
  {
94
  "epoch": 10.0,
95
+ "grad_norm": 0.6532824635505676,
96
  "learning_rate": 9e-05,
97
+ "loss": 0.4528,
98
  "step": 10
99
  },
100
  {
101
  "epoch": 10.0,
102
+ "eval_accuracy": 0.8163265306122449,
103
+ "eval_loss": 0.4452582001686096,
104
+ "eval_runtime": 1.1757,
105
+ "eval_samples_per_second": 83.352,
106
+ "eval_steps_per_second": 0.851,
107
  "step": 10
108
  },
109
  {
110
  "epoch": 11.0,
111
+ "eval_accuracy": 0.7755102040816326,
112
+ "eval_loss": 0.4558465778827667,
113
+ "eval_runtime": 1.1787,
114
+ "eval_samples_per_second": 83.141,
115
+ "eval_steps_per_second": 0.848,
116
  "step": 11
117
  },
118
  {
119
  "epoch": 12.0,
120
+ "eval_accuracy": 0.826530612244898,
121
+ "eval_loss": 0.4390490651130676,
122
+ "eval_runtime": 1.1827,
123
+ "eval_samples_per_second": 82.862,
124
+ "eval_steps_per_second": 0.846,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 13.0,
129
+ "eval_accuracy": 0.7959183673469388,
130
+ "eval_loss": 0.4321656823158264,
131
+ "eval_runtime": 1.1951,
132
+ "eval_samples_per_second": 81.999,
133
+ "eval_steps_per_second": 0.837,
134
  "step": 13
135
  },
136
  {
137
  "epoch": 14.0,
138
+ "eval_accuracy": 0.8163265306122449,
139
+ "eval_loss": 0.4322623908519745,
140
+ "eval_runtime": 1.1728,
141
+ "eval_samples_per_second": 83.56,
142
+ "eval_steps_per_second": 0.853,
143
  "step": 14
144
  },
145
  {
146
  "epoch": 15.0,
147
+ "eval_accuracy": 0.8061224489795918,
148
+ "eval_loss": 0.4126739203929901,
149
+ "eval_runtime": 1.179,
150
+ "eval_samples_per_second": 83.121,
151
+ "eval_steps_per_second": 0.848,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 16.0,
156
+ "eval_accuracy": 0.8061224489795918,
157
+ "eval_loss": 0.4341281056404114,
158
+ "eval_runtime": 1.1827,
159
+ "eval_samples_per_second": 82.862,
160
+ "eval_steps_per_second": 0.846,
161
  "step": 16
162
  },
163
  {
164
  "epoch": 17.0,
165
+ "eval_accuracy": 0.826530612244898,
166
+ "eval_loss": 0.4143841862678528,
167
+ "eval_runtime": 1.174,
168
+ "eval_samples_per_second": 83.472,
169
+ "eval_steps_per_second": 0.852,
170
  "step": 17
171
  },
172
  {
173
  "epoch": 18.0,
174
+ "eval_accuracy": 0.826530612244898,
175
+ "eval_loss": 0.4274652302265167,
176
+ "eval_runtime": 1.1859,
177
+ "eval_samples_per_second": 82.636,
178
+ "eval_steps_per_second": 0.843,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 19.0,
183
+ "eval_accuracy": 0.8673469387755102,
184
+ "eval_loss": 0.39877012372016907,
185
+ "eval_runtime": 1.1702,
186
+ "eval_samples_per_second": 83.75,
187
+ "eval_steps_per_second": 0.855,
188
  "step": 19
189
  },
190
  {
191
  "epoch": 20.0,
192
+ "grad_norm": 0.636593222618103,
193
  "learning_rate": 9e-05,
194
+ "loss": 0.4233,
195
  "step": 20
196
  },
197
  {
198
  "epoch": 20.0,
199
+ "eval_accuracy": 0.7959183673469388,
200
+ "eval_loss": 0.4209805428981781,
201
+ "eval_runtime": 1.1739,
202
+ "eval_samples_per_second": 83.481,
203
+ "eval_steps_per_second": 0.852,
204
  "step": 20
205
  },
206
  {
207
  "epoch": 21.0,
208
+ "eval_accuracy": 0.7755102040816326,
209
+ "eval_loss": 0.42234739661216736,
210
+ "eval_runtime": 1.242,
211
+ "eval_samples_per_second": 78.908,
212
+ "eval_steps_per_second": 0.805,
213
  "step": 21
214
  },
215
  {
216
  "epoch": 22.0,
217
+ "eval_accuracy": 0.826530612244898,
218
+ "eval_loss": 0.4287910461425781,
219
+ "eval_runtime": 1.1884,
220
+ "eval_samples_per_second": 82.462,
221
+ "eval_steps_per_second": 0.841,
222
  "step": 22
223
  },
224
  {
225
  "epoch": 23.0,
226
+ "eval_accuracy": 0.8571428571428571,
227
+ "eval_loss": 0.3851400911808014,
228
+ "eval_runtime": 1.171,
229
+ "eval_samples_per_second": 83.691,
230
+ "eval_steps_per_second": 0.854,
231
  "step": 23
232
  },
233
  {
234
  "epoch": 24.0,
235
+ "eval_accuracy": 0.8061224489795918,
236
+ "eval_loss": 0.3956393897533417,
237
+ "eval_runtime": 1.1758,
238
+ "eval_samples_per_second": 83.349,
239
+ "eval_steps_per_second": 0.85,
240
  "step": 24
241
  },
242
  {
243
  "epoch": 25.0,
244
+ "eval_accuracy": 0.8367346938775511,
245
+ "eval_loss": 0.4159245789051056,
246
+ "eval_runtime": 1.1679,
247
+ "eval_samples_per_second": 83.915,
248
+ "eval_steps_per_second": 0.856,
249
  "step": 25
250
  },
251
  {
252
  "epoch": 26.0,
253
+ "eval_accuracy": 0.8163265306122449,
254
+ "eval_loss": 0.4054996073246002,
255
+ "eval_runtime": 1.1741,
256
+ "eval_samples_per_second": 83.467,
257
+ "eval_steps_per_second": 0.852,
258
  "step": 26
259
  },
260
  {
261
  "epoch": 27.0,
262
+ "eval_accuracy": 0.8163265306122449,
263
+ "eval_loss": 0.3861065208911896,
264
+ "eval_runtime": 1.191,
265
+ "eval_samples_per_second": 82.286,
266
+ "eval_steps_per_second": 0.84,
267
  "step": 27
268
  },
269
  {
270
  "epoch": 28.0,
271
+ "eval_accuracy": 0.8469387755102041,
272
+ "eval_loss": 0.3751261532306671,
273
+ "eval_runtime": 1.1897,
274
+ "eval_samples_per_second": 82.373,
275
+ "eval_steps_per_second": 0.841,
276
  "step": 28
277
  },
278
  {
279
  "epoch": 29.0,
280
+ "eval_accuracy": 0.8367346938775511,
281
+ "eval_loss": 0.39149540662765503,
282
+ "eval_runtime": 1.2043,
283
+ "eval_samples_per_second": 81.378,
284
+ "eval_steps_per_second": 0.83,
285
  "step": 29
286
  },
287
  {
288
  "epoch": 30.0,
289
+ "grad_norm": 0.5932164788246155,
290
  "learning_rate": 7.88888888888889e-05,
291
+ "loss": 0.3846,
292
  "step": 30
293
  },
294
  {
295
  "epoch": 30.0,
296
+ "eval_accuracy": 0.8571428571428571,
297
+ "eval_loss": 0.3704555332660675,
298
+ "eval_runtime": 1.2019,
299
+ "eval_samples_per_second": 81.54,
300
+ "eval_steps_per_second": 0.832,
301
  "step": 30
302
  },
303
  {
304
  "epoch": 31.0,
305
+ "eval_accuracy": 0.8367346938775511,
306
+ "eval_loss": 0.3868422508239746,
307
+ "eval_runtime": 1.1797,
308
+ "eval_samples_per_second": 83.073,
309
+ "eval_steps_per_second": 0.848,
310
  "step": 31
311
  },
312
  {
313
  "epoch": 32.0,
314
+ "eval_accuracy": 0.8469387755102041,
315
+ "eval_loss": 0.37100356817245483,
316
+ "eval_runtime": 1.1859,
317
+ "eval_samples_per_second": 82.641,
318
+ "eval_steps_per_second": 0.843,
319
  "step": 32
320
  },
321
  {
322
  "epoch": 33.0,
323
+ "eval_accuracy": 0.8469387755102041,
324
+ "eval_loss": 0.37700414657592773,
325
+ "eval_runtime": 1.1948,
326
+ "eval_samples_per_second": 82.025,
327
+ "eval_steps_per_second": 0.837,
328
  "step": 33
329
  },
330
  {
331
  "epoch": 34.0,
332
+ "eval_accuracy": 0.826530612244898,
333
+ "eval_loss": 0.39025142788887024,
334
+ "eval_runtime": 1.1986,
335
+ "eval_samples_per_second": 81.762,
336
+ "eval_steps_per_second": 0.834,
337
  "step": 34
338
  },
339
  {
340
  "epoch": 35.0,
341
+ "eval_accuracy": 0.8469387755102041,
342
+ "eval_loss": 0.3864216208457947,
343
+ "eval_runtime": 1.1828,
344
+ "eval_samples_per_second": 82.852,
345
+ "eval_steps_per_second": 0.845,
346
  "step": 35
347
  },
348
  {
349
  "epoch": 36.0,
350
+ "eval_accuracy": 0.826530612244898,
351
+ "eval_loss": 0.37283435463905334,
352
+ "eval_runtime": 1.1824,
353
+ "eval_samples_per_second": 82.882,
354
+ "eval_steps_per_second": 0.846,
355
  "step": 36
356
  },
357
  {
358
  "epoch": 37.0,
359
+ "eval_accuracy": 0.8367346938775511,
360
+ "eval_loss": 0.37720832228660583,
361
+ "eval_runtime": 1.2152,
362
+ "eval_samples_per_second": 80.648,
363
+ "eval_steps_per_second": 0.823,
364
  "step": 37
365
  },
366
  {
367
  "epoch": 38.0,
368
+ "eval_accuracy": 0.8163265306122449,
369
+ "eval_loss": 0.36333897709846497,
370
+ "eval_runtime": 1.1808,
371
+ "eval_samples_per_second": 82.993,
372
+ "eval_steps_per_second": 0.847,
373
  "step": 38
374
  },
375
  {
376
  "epoch": 39.0,
377
+ "eval_accuracy": 0.8469387755102041,
378
+ "eval_loss": 0.38240504264831543,
379
+ "eval_runtime": 1.1905,
380
+ "eval_samples_per_second": 82.317,
381
+ "eval_steps_per_second": 0.84,
382
  "step": 39
383
  },
384
  {
385
  "epoch": 40.0,
386
+ "grad_norm": 0.6421281099319458,
387
  "learning_rate": 6.777777777777778e-05,
388
+ "loss": 0.3714,
389
  "step": 40
390
  },
391
  {
392
  "epoch": 40.0,
393
+ "eval_accuracy": 0.8571428571428571,
394
+ "eval_loss": 0.3520002067089081,
395
+ "eval_runtime": 1.1819,
396
+ "eval_samples_per_second": 82.917,
397
+ "eval_steps_per_second": 0.846,
398
  "step": 40
399
  },
400
  {
401
  "epoch": 41.0,
402
+ "eval_accuracy": 0.8469387755102041,
403
+ "eval_loss": 0.3843795657157898,
404
+ "eval_runtime": 1.1866,
405
+ "eval_samples_per_second": 82.592,
406
+ "eval_steps_per_second": 0.843,
407
  "step": 41
408
  },
409
  {
410
  "epoch": 42.0,
411
+ "eval_accuracy": 0.8469387755102041,
412
+ "eval_loss": 0.3564036190509796,
413
+ "eval_runtime": 1.1961,
414
+ "eval_samples_per_second": 81.93,
415
+ "eval_steps_per_second": 0.836,
416
  "step": 42
417
  },
418
  {
419
  "epoch": 43.0,
420
+ "eval_accuracy": 0.8673469387755102,
421
+ "eval_loss": 0.3747188150882721,
422
+ "eval_runtime": 1.1718,
423
+ "eval_samples_per_second": 83.63,
424
+ "eval_steps_per_second": 0.853,
425
  "step": 43
426
  },
427
  {
428
  "epoch": 44.0,
429
+ "eval_accuracy": 0.8571428571428571,
430
+ "eval_loss": 0.33950209617614746,
431
+ "eval_runtime": 1.2426,
432
+ "eval_samples_per_second": 78.864,
433
+ "eval_steps_per_second": 0.805,
434
  "step": 44
435
  },
436
  {
437
  "epoch": 45.0,
438
+ "eval_accuracy": 0.8163265306122449,
439
+ "eval_loss": 0.38714832067489624,
440
+ "eval_runtime": 1.2757,
441
+ "eval_samples_per_second": 76.82,
442
+ "eval_steps_per_second": 0.784,
443
  "step": 45
444
  },
445
  {
446
  "epoch": 46.0,
447
+ "eval_accuracy": 0.8367346938775511,
448
+ "eval_loss": 0.348727285861969,
449
+ "eval_runtime": 1.1817,
450
+ "eval_samples_per_second": 82.933,
451
+ "eval_steps_per_second": 0.846,
452
  "step": 46
453
  },
454
  {
455
  "epoch": 47.0,
456
+ "eval_accuracy": 0.8163265306122449,
457
+ "eval_loss": 0.3797769844532013,
458
+ "eval_runtime": 1.1792,
459
+ "eval_samples_per_second": 83.105,
460
+ "eval_steps_per_second": 0.848,
461
  "step": 47
462
  },
463
  {
464
  "epoch": 48.0,
465
+ "eval_accuracy": 0.8367346938775511,
466
+ "eval_loss": 0.38484281301498413,
467
+ "eval_runtime": 1.1845,
468
+ "eval_samples_per_second": 82.735,
469
+ "eval_steps_per_second": 0.844,
470
  "step": 48
471
  },
472
  {
473
  "epoch": 49.0,
474
+ "eval_accuracy": 0.826530612244898,
475
+ "eval_loss": 0.3978123068809509,
476
+ "eval_runtime": 1.1774,
477
+ "eval_samples_per_second": 83.238,
478
+ "eval_steps_per_second": 0.849,
479
  "step": 49
480
  },
481
  {
482
  "epoch": 50.0,
483
+ "grad_norm": 0.7799413204193115,
484
  "learning_rate": 5.666666666666667e-05,
485
+ "loss": 0.3618,
486
  "step": 50
487
  },
488
  {
489
  "epoch": 50.0,
490
+ "eval_accuracy": 0.8571428571428571,
491
+ "eval_loss": 0.33839166164398193,
492
+ "eval_runtime": 1.1751,
493
+ "eval_samples_per_second": 83.397,
494
+ "eval_steps_per_second": 0.851,
495
  "step": 50
496
  },
497
  {
498
  "epoch": 51.0,
499
+ "eval_accuracy": 0.826530612244898,
500
+ "eval_loss": 0.36474189162254333,
501
+ "eval_runtime": 1.1761,
502
+ "eval_samples_per_second": 83.326,
503
+ "eval_steps_per_second": 0.85,
504
  "step": 51
505
  },
506
  {
507
  "epoch": 52.0,
508
+ "eval_accuracy": 0.8571428571428571,
509
+ "eval_loss": 0.35444045066833496,
510
+ "eval_runtime": 1.2058,
511
+ "eval_samples_per_second": 81.274,
512
+ "eval_steps_per_second": 0.829,
513
  "step": 52
514
  },
515
  {
516
  "epoch": 53.0,
517
+ "eval_accuracy": 0.8163265306122449,
518
+ "eval_loss": 0.42885449528694153,
519
+ "eval_runtime": 1.1966,
520
+ "eval_samples_per_second": 81.895,
521
+ "eval_steps_per_second": 0.836,
522
  "step": 53
523
  },
524
  {
525
  "epoch": 54.0,
526
+ "eval_accuracy": 0.8673469387755102,
527
+ "eval_loss": 0.3567652404308319,
528
+ "eval_runtime": 1.1882,
529
+ "eval_samples_per_second": 82.48,
530
+ "eval_steps_per_second": 0.842,
531
  "step": 54
532
  },
533
  {
534
  "epoch": 55.0,
535
+ "eval_accuracy": 0.8673469387755102,
536
+ "eval_loss": 0.37271520495414734,
537
+ "eval_runtime": 1.1895,
538
+ "eval_samples_per_second": 82.388,
539
+ "eval_steps_per_second": 0.841,
540
  "step": 55
541
  },
542
  {
543
  "epoch": 56.0,
544
+ "eval_accuracy": 0.826530612244898,
545
+ "eval_loss": 0.3796241879463196,
546
+ "eval_runtime": 1.1802,
547
+ "eval_samples_per_second": 83.037,
548
+ "eval_steps_per_second": 0.847,
549
  "step": 56
550
  },
551
  {
552
  "epoch": 57.0,
553
+ "eval_accuracy": 0.8571428571428571,
554
+ "eval_loss": 0.36781173944473267,
555
+ "eval_runtime": 1.1792,
556
+ "eval_samples_per_second": 83.11,
557
+ "eval_steps_per_second": 0.848,
558
  "step": 57
559
  },
560
  {
561
  "epoch": 58.0,
562
+ "eval_accuracy": 0.8469387755102041,
563
+ "eval_loss": 0.3718703091144562,
564
+ "eval_runtime": 1.1805,
565
+ "eval_samples_per_second": 83.012,
566
+ "eval_steps_per_second": 0.847,
567
  "step": 58
568
  },
569
  {
570
  "epoch": 59.0,
571
+ "eval_accuracy": 0.8877551020408163,
572
+ "eval_loss": 0.38076311349868774,
573
+ "eval_runtime": 1.1858,
574
+ "eval_samples_per_second": 82.643,
575
+ "eval_steps_per_second": 0.843,
576
  "step": 59
577
  },
578
  {
579
  "epoch": 60.0,
580
+ "grad_norm": 0.8338537216186523,
581
  "learning_rate": 4.555555555555556e-05,
582
+ "loss": 0.327,
583
  "step": 60
584
  },
585
  {
586
  "epoch": 60.0,
587
+ "eval_accuracy": 0.8163265306122449,
588
+ "eval_loss": 0.3783416748046875,
589
+ "eval_runtime": 1.2706,
590
+ "eval_samples_per_second": 77.13,
591
+ "eval_steps_per_second": 0.787,
592
  "step": 60
593
  },
594
  {
595
  "epoch": 61.0,
596
+ "eval_accuracy": 0.8367346938775511,
597
+ "eval_loss": 0.363650381565094,
598
+ "eval_runtime": 1.2014,
599
+ "eval_samples_per_second": 81.574,
600
+ "eval_steps_per_second": 0.832,
601
  "step": 61
602
  },
603
  {
604
  "epoch": 62.0,
605
+ "eval_accuracy": 0.8367346938775511,
606
+ "eval_loss": 0.37426507472991943,
607
+ "eval_runtime": 1.1766,
608
+ "eval_samples_per_second": 83.293,
609
+ "eval_steps_per_second": 0.85,
610
  "step": 62
611
  },
612
  {
613
  "epoch": 63.0,
614
+ "eval_accuracy": 0.8571428571428571,
615
+ "eval_loss": 0.3553648591041565,
616
+ "eval_runtime": 1.1889,
617
+ "eval_samples_per_second": 82.429,
618
+ "eval_steps_per_second": 0.841,
619
  "step": 63
620
  },
621
  {
622
  "epoch": 64.0,
623
+ "eval_accuracy": 0.826530612244898,
624
+ "eval_loss": 0.3544183075428009,
625
+ "eval_runtime": 1.1948,
626
+ "eval_samples_per_second": 82.02,
627
+ "eval_steps_per_second": 0.837,
628
  "step": 64
629
  },
630
  {
631
  "epoch": 65.0,
632
+ "eval_accuracy": 0.8469387755102041,
633
+ "eval_loss": 0.361508309841156,
634
+ "eval_runtime": 1.1861,
635
+ "eval_samples_per_second": 82.624,
636
+ "eval_steps_per_second": 0.843,
637
  "step": 65
638
  },
639
  {
640
  "epoch": 66.0,
641
+ "eval_accuracy": 0.8673469387755102,
642
+ "eval_loss": 0.3502516448497772,
643
+ "eval_runtime": 1.1913,
644
+ "eval_samples_per_second": 82.261,
645
+ "eval_steps_per_second": 0.839,
646
  "step": 66
647
  },
648
  {
649
  "epoch": 67.0,
650
+ "eval_accuracy": 0.7959183673469388,
651
+ "eval_loss": 0.39142534136772156,
652
+ "eval_runtime": 1.1829,
653
+ "eval_samples_per_second": 82.845,
654
+ "eval_steps_per_second": 0.845,
655
  "step": 67
656
  },
657
  {
658
  "epoch": 68.0,
659
+ "eval_accuracy": 0.8367346938775511,
660
+ "eval_loss": 0.3686521351337433,
661
+ "eval_runtime": 1.2512,
662
+ "eval_samples_per_second": 78.327,
663
+ "eval_steps_per_second": 0.799,
664
  "step": 68
665
  },
666
  {
667
  "epoch": 69.0,
668
+ "eval_accuracy": 0.8877551020408163,
669
+ "eval_loss": 0.3295800983905792,
670
+ "eval_runtime": 1.1917,
671
+ "eval_samples_per_second": 82.233,
672
+ "eval_steps_per_second": 0.839,
673
  "step": 69
674
  },
675
  {
676
  "epoch": 70.0,
677
+ "grad_norm": 0.4433494806289673,
678
  "learning_rate": 3.444444444444445e-05,
679
+ "loss": 0.3136,
680
  "step": 70
681
  },
682
  {
683
  "epoch": 70.0,
684
+ "eval_accuracy": 0.8571428571428571,
685
+ "eval_loss": 0.35484427213668823,
686
+ "eval_runtime": 1.1759,
687
+ "eval_samples_per_second": 83.337,
688
+ "eval_steps_per_second": 0.85,
689
  "step": 70
690
  },
691
  {
692
  "epoch": 71.0,
693
+ "eval_accuracy": 0.826530612244898,
694
+ "eval_loss": 0.3809606730937958,
695
+ "eval_runtime": 1.175,
696
+ "eval_samples_per_second": 83.406,
697
+ "eval_steps_per_second": 0.851,
698
  "step": 71
699
  },
700
  {
701
  "epoch": 72.0,
702
+ "eval_accuracy": 0.8469387755102041,
703
+ "eval_loss": 0.3522069752216339,
704
+ "eval_runtime": 1.169,
705
+ "eval_samples_per_second": 83.829,
706
+ "eval_steps_per_second": 0.855,
707
  "step": 72
708
  },
709
  {
710
  "epoch": 73.0,
711
+ "eval_accuracy": 0.8367346938775511,
712
+ "eval_loss": 0.3851645290851593,
713
+ "eval_runtime": 1.1768,
714
+ "eval_samples_per_second": 83.277,
715
+ "eval_steps_per_second": 0.85,
716
  "step": 73
717
  },
718
  {
719
  "epoch": 74.0,
720
+ "eval_accuracy": 0.8571428571428571,
721
+ "eval_loss": 0.34336620569229126,
722
+ "eval_runtime": 1.1784,
723
+ "eval_samples_per_second": 83.163,
724
+ "eval_steps_per_second": 0.849,
725
  "step": 74
726
  },
727
  {
728
  "epoch": 75.0,
729
+ "eval_accuracy": 0.8571428571428571,
730
+ "eval_loss": 0.35957837104797363,
731
+ "eval_runtime": 1.2169,
732
+ "eval_samples_per_second": 80.535,
733
+ "eval_steps_per_second": 0.822,
734
  "step": 75
735
  },
736
  {
737
  "epoch": 76.0,
738
+ "eval_accuracy": 0.8367346938775511,
739
+ "eval_loss": 0.3550688624382019,
740
+ "eval_runtime": 1.2477,
741
+ "eval_samples_per_second": 78.542,
742
+ "eval_steps_per_second": 0.801,
743
  "step": 76
744
  },
745
  {
746
  "epoch": 77.0,
747
+ "eval_accuracy": 0.8163265306122449,
748
+ "eval_loss": 0.42566972970962524,
749
+ "eval_runtime": 1.1764,
750
+ "eval_samples_per_second": 83.303,
751
+ "eval_steps_per_second": 0.85,
752
  "step": 77
753
  },
754
  {
755
  "epoch": 78.0,
756
  "eval_accuracy": 0.8367346938775511,
757
+ "eval_loss": 0.3554403781890869,
758
+ "eval_runtime": 1.1844,
759
+ "eval_samples_per_second": 82.741,
760
+ "eval_steps_per_second": 0.844,
761
  "step": 78
762
  },
763
  {
764
  "epoch": 79.0,
765
+ "eval_accuracy": 0.826530612244898,
766
+ "eval_loss": 0.3352053165435791,
767
+ "eval_runtime": 1.1717,
768
+ "eval_samples_per_second": 83.636,
769
  "eval_steps_per_second": 0.853,
770
  "step": 79
771
  },
772
  {
773
  "epoch": 80.0,
774
+ "grad_norm": 0.825162947177887,
775
  "learning_rate": 2.3333333333333336e-05,
776
+ "loss": 0.316,
777
  "step": 80
778
  },
779
  {
780
  "epoch": 80.0,
781
+ "eval_accuracy": 0.8367346938775511,
782
+ "eval_loss": 0.3773196041584015,
783
+ "eval_runtime": 1.1893,
784
+ "eval_samples_per_second": 82.399,
785
+ "eval_steps_per_second": 0.841,
786
  "step": 80
787
  },
788
  {
789
  "epoch": 81.0,
790
+ "eval_accuracy": 0.8469387755102041,
791
+ "eval_loss": 0.33054399490356445,
792
+ "eval_runtime": 1.1987,
793
+ "eval_samples_per_second": 81.752,
794
+ "eval_steps_per_second": 0.834,
795
  "step": 81
796
  },
797
  {
798
  "epoch": 82.0,
799
+ "eval_accuracy": 0.8571428571428571,
800
+ "eval_loss": 0.3614092171192169,
801
+ "eval_runtime": 1.1892,
802
+ "eval_samples_per_second": 82.41,
803
+ "eval_steps_per_second": 0.841,
804
  "step": 82
805
  },
806
  {
807
  "epoch": 83.0,
808
  "eval_accuracy": 0.826530612244898,
809
+ "eval_loss": 0.3490673005580902,
810
+ "eval_runtime": 1.2082,
811
+ "eval_samples_per_second": 81.111,
812
+ "eval_steps_per_second": 0.828,
813
  "step": 83
814
  },
815
  {
816
  "epoch": 84.0,
817
+ "eval_accuracy": 0.8571428571428571,
818
+ "eval_loss": 0.34793397784233093,
819
+ "eval_runtime": 1.1918,
820
+ "eval_samples_per_second": 82.229,
821
+ "eval_steps_per_second": 0.839,
822
  "step": 84
823
  },
824
  {
825
  "epoch": 85.0,
826
+ "eval_accuracy": 0.8367346938775511,
827
+ "eval_loss": 0.36838239431381226,
828
+ "eval_runtime": 1.1842,
829
+ "eval_samples_per_second": 82.758,
830
+ "eval_steps_per_second": 0.844,
831
  "step": 85
832
  },
833
  {
834
  "epoch": 86.0,
835
+ "eval_accuracy": 0.8571428571428571,
836
+ "eval_loss": 0.35111716389656067,
837
+ "eval_runtime": 1.1928,
838
+ "eval_samples_per_second": 82.162,
839
+ "eval_steps_per_second": 0.838,
840
  "step": 86
841
  },
842
  {
843
  "epoch": 87.0,
844
+ "eval_accuracy": 0.826530612244898,
845
+ "eval_loss": 0.36582064628601074,
846
+ "eval_runtime": 1.2026,
847
+ "eval_samples_per_second": 81.493,
848
+ "eval_steps_per_second": 0.832,
849
  "step": 87
850
  },
851
  {
852
  "epoch": 88.0,
853
+ "eval_accuracy": 0.8367346938775511,
854
+ "eval_loss": 0.3332655727863312,
855
+ "eval_runtime": 1.1993,
856
+ "eval_samples_per_second": 81.713,
857
+ "eval_steps_per_second": 0.834,
858
  "step": 88
859
  },
860
  {
861
  "epoch": 89.0,
862
+ "eval_accuracy": 0.8775510204081632,
863
+ "eval_loss": 0.3584078550338745,
864
+ "eval_runtime": 1.1944,
865
+ "eval_samples_per_second": 82.048,
866
+ "eval_steps_per_second": 0.837,
867
  "step": 89
868
  },
869
  {
870
  "epoch": 90.0,
871
+ "grad_norm": 0.8733311295509338,
872
  "learning_rate": 1.2222222222222222e-05,
873
+ "loss": 0.3089,
874
  "step": 90
875
  },
876
  {
877
  "epoch": 90.0,
878
+ "eval_accuracy": 0.8571428571428571,
879
+ "eval_loss": 0.3277149498462677,
880
+ "eval_runtime": 1.1993,
881
+ "eval_samples_per_second": 81.712,
882
+ "eval_steps_per_second": 0.834,
883
  "step": 90
884
  },
885
  {
886
  "epoch": 91.0,
887
+ "eval_accuracy": 0.8367346938775511,
888
+ "eval_loss": 0.3874940276145935,
889
+ "eval_runtime": 1.236,
890
+ "eval_samples_per_second": 79.287,
891
+ "eval_steps_per_second": 0.809,
892
  "step": 91
893
  },
894
  {
895
  "epoch": 92.0,
896
+ "eval_accuracy": 0.8367346938775511,
897
+ "eval_loss": 0.3757161796092987,
898
+ "eval_runtime": 1.2043,
899
+ "eval_samples_per_second": 81.378,
900
  "eval_steps_per_second": 0.83,
901
  "step": 92
902
  },
903
  {
904
  "epoch": 93.0,
905
+ "eval_accuracy": 0.8367346938775511,
906
+ "eval_loss": 0.34884124994277954,
907
+ "eval_runtime": 1.1848,
908
+ "eval_samples_per_second": 82.716,
909
+ "eval_steps_per_second": 0.844,
910
  "step": 93
911
  },
912
  {
913
  "epoch": 94.0,
914
+ "eval_accuracy": 0.8571428571428571,
915
+ "eval_loss": 0.32820624113082886,
916
+ "eval_runtime": 1.1771,
917
+ "eval_samples_per_second": 83.253,
918
+ "eval_steps_per_second": 0.85,
919
  "step": 94
920
  },
921
  {
922
  "epoch": 95.0,
923
+ "eval_accuracy": 0.8571428571428571,
924
+ "eval_loss": 0.36127328872680664,
925
+ "eval_runtime": 1.1851,
926
+ "eval_samples_per_second": 82.69,
927
+ "eval_steps_per_second": 0.844,
928
  "step": 95
929
  },
930
  {
931
  "epoch": 96.0,
932
+ "eval_accuracy": 0.8469387755102041,
933
+ "eval_loss": 0.3753064274787903,
934
+ "eval_runtime": 1.1883,
935
+ "eval_samples_per_second": 82.468,
936
+ "eval_steps_per_second": 0.842,
937
  "step": 96
938
  },
939
  {
940
  "epoch": 97.0,
941
+ "eval_accuracy": 0.8469387755102041,
942
+ "eval_loss": 0.3625222146511078,
943
+ "eval_runtime": 1.1837,
944
+ "eval_samples_per_second": 82.79,
945
+ "eval_steps_per_second": 0.845,
946
  "step": 97
947
  },
948
  {
949
  "epoch": 98.0,
950
+ "eval_accuracy": 0.826530612244898,
951
+ "eval_loss": 0.39299532771110535,
952
+ "eval_runtime": 1.1901,
953
+ "eval_samples_per_second": 82.345,
954
+ "eval_steps_per_second": 0.84,
955
  "step": 98
956
  },
957
  {
958
  "epoch": 99.0,
959
+ "eval_accuracy": 0.8469387755102041,
960
+ "eval_loss": 0.33383709192276,
961
+ "eval_runtime": 1.2031,
962
+ "eval_samples_per_second": 81.453,
963
+ "eval_steps_per_second": 0.831,
964
  "step": 99
965
  },
966
  {
967
  "epoch": 100.0,
968
+ "grad_norm": 0.7086212038993835,
969
  "learning_rate": 1.1111111111111112e-06,
970
+ "loss": 0.3131,
971
  "step": 100
972
  },
973
  {
974
  "epoch": 100.0,
975
+ "eval_accuracy": 0.8367346938775511,
976
+ "eval_loss": 0.3329985439777374,
977
+ "eval_runtime": 1.1868,
978
+ "eval_samples_per_second": 82.575,
979
+ "eval_steps_per_second": 0.843,
980
  "step": 100
981
  },
982
  {
983
  "epoch": 100.0,
984
  "step": 100,
985
  "total_flos": 2.86484619552768e+17,
986
+ "train_loss": 0.35724998712539674,
987
+ "train_runtime": 1042.9816,
988
+ "train_samples_per_second": 75.84,
989
  "train_steps_per_second": 0.096
990
  }
991
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b82afc7b8b054f7658f49beed0450e25404a6a11425a8d3abe72e5c833914824
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228cdccd6eb86df0bca52bd52b308ced5d7448628304d7e0167d2ea79c4a75f9
3
  size 5368