2nzi commited on
Commit
fed1adc
·
verified ·
1 Parent(s): e4a9ea6

Model save

Browse files
Files changed (5) hide show
  1. README.md +7 -7
  2. all_results.json +5 -5
  3. test_results.json +5 -5
  4. trainer_state.json +229 -229
  5. val_results.json +5 -5
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.5500
21
- - Accuracy: 0.8548
22
 
23
  ## Model description
24
 
@@ -50,11 +50,11 @@ The following hyperparameters were used during training:
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:------:|:----:|:---------------:|:--------:|
53
- | 0.4309 | 0.2011 | 186 | 0.6522 | 0.7220 |
54
- | 0.6819 | 1.2011 | 372 | 0.7019 | 0.7552 |
55
- | 0.5011 | 2.2011 | 558 | 0.5440 | 0.8216 |
56
- | 0.3274 | 3.2011 | 744 | 0.6834 | 0.8257 |
57
- | 0.0083 | 4.1957 | 925 | 0.5500 | 0.8548 |
58
 
59
 
60
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [facebook/timesformer-base-finetuned-k400](https://huggingface.co/facebook/timesformer-base-finetuned-k400) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6192
21
+ - Accuracy: 0.8382
22
 
23
  ## Model description
24
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:------:|:----:|:---------------:|:--------:|
53
+ | 0.4491 | 0.2011 | 186 | 0.6939 | 0.7386 |
54
+ | 0.5627 | 1.2011 | 372 | 0.6806 | 0.7759 |
55
+ | 0.5189 | 2.2011 | 558 | 0.6510 | 0.8174 |
56
+ | 0.2503 | 3.2011 | 744 | 0.6732 | 0.8174 |
57
+ | 0.0159 | 4.1957 | 925 | 0.6192 | 0.8382 |
58
 
59
 
60
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.195675675675676,
3
- "eval_accuracy": 0.8547717842323651,
4
- "eval_loss": 0.5500420928001404,
5
- "eval_runtime": 434.3892,
6
- "eval_samples_per_second": 0.555,
7
- "eval_steps_per_second": 0.14
8
  }
 
1
  {
2
  "epoch": 4.195675675675676,
3
+ "eval_accuracy": 0.8381742738589212,
4
+ "eval_loss": 0.6192311644554138,
5
+ "eval_runtime": 391.274,
6
+ "eval_samples_per_second": 0.616,
7
+ "eval_steps_per_second": 0.156
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.195675675675676,
3
- "eval_accuracy": 0.8269662921348314,
4
- "eval_loss": 0.7350317239761353,
5
- "eval_runtime": 796.0622,
6
- "eval_samples_per_second": 0.559,
7
- "eval_steps_per_second": 0.141
8
  }
 
1
  {
2
  "epoch": 4.195675675675676,
3
+ "eval_accuracy": 0.8134831460674158,
4
+ "eval_loss": 0.7141955494880676,
5
+ "eval_runtime": 720.0647,
6
+ "eval_samples_per_second": 0.618,
7
+ "eval_steps_per_second": 0.156
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8547717842323651,
3
  "best_model_checkpoint": "videomae-timesformer-surf-analytics\\checkpoint-925",
4
  "epoch": 4.195675675675676,
5
  "eval_steps": 500,
@@ -10,727 +10,727 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.010810810810810811,
13
- "grad_norm": 10.205573081970215,
14
  "learning_rate": 5.376344086021506e-06,
15
- "loss": 1.5305,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.021621621621621623,
20
- "grad_norm": 11.725934028625488,
21
  "learning_rate": 1.0752688172043012e-05,
22
- "loss": 1.2199,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.032432432432432434,
27
- "grad_norm": 6.974359035491943,
28
  "learning_rate": 1.6129032258064517e-05,
29
- "loss": 1.281,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.043243243243243246,
34
- "grad_norm": 7.546658515930176,
35
  "learning_rate": 2.1505376344086024e-05,
36
- "loss": 1.0878,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.05405405405405406,
41
- "grad_norm": 9.739876747131348,
42
  "learning_rate": 2.6881720430107527e-05,
43
- "loss": 1.133,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.06486486486486487,
48
- "grad_norm": 14.410557746887207,
49
  "learning_rate": 3.2258064516129034e-05,
50
- "loss": 1.049,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.07567567567567568,
55
- "grad_norm": 8.305644989013672,
56
  "learning_rate": 3.763440860215054e-05,
57
- "loss": 1.0185,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.08648648648648649,
62
- "grad_norm": 5.283512115478516,
63
  "learning_rate": 4.301075268817205e-05,
64
- "loss": 0.6385,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.0972972972972973,
69
- "grad_norm": 9.341029167175293,
70
  "learning_rate": 4.8387096774193554e-05,
71
- "loss": 0.7407,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.10810810810810811,
76
- "grad_norm": 6.474923610687256,
77
  "learning_rate": 4.957932692307692e-05,
78
- "loss": 0.5965,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.11891891891891893,
83
- "grad_norm": 7.116113662719727,
84
  "learning_rate": 4.897836538461539e-05,
85
- "loss": 0.9715,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.12972972972972974,
90
- "grad_norm": 10.062265396118164,
91
  "learning_rate": 4.8377403846153846e-05,
92
- "loss": 0.5473,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.14054054054054055,
97
- "grad_norm": 32.85737228393555,
98
  "learning_rate": 4.777644230769231e-05,
99
- "loss": 0.8372,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.15135135135135136,
104
- "grad_norm": 16.32477378845215,
105
  "learning_rate": 4.717548076923077e-05,
106
- "loss": 0.653,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.16216216216216217,
111
- "grad_norm": 11.066850662231445,
112
  "learning_rate": 4.657451923076923e-05,
113
- "loss": 0.6233,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.17297297297297298,
118
- "grad_norm": 4.975943565368652,
119
  "learning_rate": 4.5973557692307694e-05,
120
- "loss": 1.0554,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.1837837837837838,
125
- "grad_norm": 11.884200096130371,
126
  "learning_rate": 4.5372596153846156e-05,
127
- "loss": 0.6109,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.1945945945945946,
132
- "grad_norm": 11.737774848937988,
133
  "learning_rate": 4.477163461538462e-05,
134
- "loss": 0.4309,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.20108108108108108,
139
- "eval_accuracy": 0.7219917012448133,
140
- "eval_loss": 0.652153491973877,
141
- "eval_runtime": 361.5654,
142
- "eval_samples_per_second": 0.667,
143
- "eval_steps_per_second": 0.169,
144
  "step": 186
145
  },
146
  {
147
  "epoch": 1.0043243243243243,
148
- "grad_norm": 4.409055709838867,
149
  "learning_rate": 4.417067307692308e-05,
150
- "loss": 0.5466,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.0151351351351352,
155
- "grad_norm": 5.313009262084961,
156
  "learning_rate": 4.3569711538461535e-05,
157
- "loss": 0.0821,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.025945945945946,
162
- "grad_norm": 1.3797563314437866,
163
  "learning_rate": 4.2968750000000004e-05,
164
- "loss": 0.2795,
165
  "step": 210
166
  },
167
  {
168
  "epoch": 1.0367567567567568,
169
- "grad_norm": 1.0455204248428345,
170
  "learning_rate": 4.2367788461538466e-05,
171
- "loss": 0.798,
172
  "step": 220
173
  },
174
  {
175
  "epoch": 1.0475675675675675,
176
- "grad_norm": 17.128549575805664,
177
  "learning_rate": 4.176682692307692e-05,
178
- "loss": 0.3279,
179
  "step": 230
180
  },
181
  {
182
  "epoch": 1.0583783783783784,
183
- "grad_norm": 18.71080207824707,
184
  "learning_rate": 4.116586538461539e-05,
185
- "loss": 0.5378,
186
  "step": 240
187
  },
188
  {
189
  "epoch": 1.0691891891891891,
190
- "grad_norm": 15.340435028076172,
191
  "learning_rate": 4.0564903846153846e-05,
192
- "loss": 0.3799,
193
  "step": 250
194
  },
195
  {
196
  "epoch": 1.08,
197
- "grad_norm": 3.928271532058716,
198
  "learning_rate": 3.996394230769231e-05,
199
- "loss": 0.3517,
200
  "step": 260
201
  },
202
  {
203
  "epoch": 1.0908108108108108,
204
- "grad_norm": 14.76327133178711,
205
  "learning_rate": 3.936298076923077e-05,
206
- "loss": 0.5122,
207
  "step": 270
208
  },
209
  {
210
  "epoch": 1.1016216216216217,
211
- "grad_norm": 18.79231071472168,
212
  "learning_rate": 3.876201923076923e-05,
213
- "loss": 0.4962,
214
  "step": 280
215
  },
216
  {
217
  "epoch": 1.1124324324324324,
218
- "grad_norm": 19.632802963256836,
219
  "learning_rate": 3.8161057692307694e-05,
220
- "loss": 0.5597,
221
  "step": 290
222
  },
223
  {
224
  "epoch": 1.1232432432432433,
225
- "grad_norm": 19.008041381835938,
226
  "learning_rate": 3.7560096153846156e-05,
227
- "loss": 0.3686,
228
  "step": 300
229
  },
230
  {
231
  "epoch": 1.134054054054054,
232
- "grad_norm": 19.428789138793945,
233
  "learning_rate": 3.695913461538462e-05,
234
- "loss": 0.4503,
235
  "step": 310
236
  },
237
  {
238
  "epoch": 1.144864864864865,
239
- "grad_norm": 14.225759506225586,
240
  "learning_rate": 3.635817307692308e-05,
241
- "loss": 0.4095,
242
  "step": 320
243
  },
244
  {
245
  "epoch": 1.1556756756756756,
246
- "grad_norm": 4.436385631561279,
247
  "learning_rate": 3.5757211538461535e-05,
248
- "loss": 0.3878,
249
  "step": 330
250
  },
251
  {
252
  "epoch": 1.1664864864864866,
253
- "grad_norm": 0.6838646531105042,
254
  "learning_rate": 3.5156250000000004e-05,
255
- "loss": 0.3264,
256
  "step": 340
257
  },
258
  {
259
  "epoch": 1.1772972972972973,
260
- "grad_norm": 0.13564491271972656,
261
  "learning_rate": 3.4555288461538466e-05,
262
- "loss": 0.4799,
263
  "step": 350
264
  },
265
  {
266
  "epoch": 1.1881081081081082,
267
- "grad_norm": 7.611846923828125,
268
  "learning_rate": 3.395432692307692e-05,
269
- "loss": 0.3353,
270
  "step": 360
271
  },
272
  {
273
  "epoch": 1.1989189189189189,
274
- "grad_norm": 19.25046730041504,
275
  "learning_rate": 3.335336538461539e-05,
276
- "loss": 0.6819,
277
  "step": 370
278
  },
279
  {
280
  "epoch": 1.201081081081081,
281
- "eval_accuracy": 0.7551867219917012,
282
- "eval_loss": 0.7018738985061646,
283
- "eval_runtime": 356.8316,
284
- "eval_samples_per_second": 0.675,
285
- "eval_steps_per_second": 0.171,
286
  "step": 372
287
  },
288
  {
289
  "epoch": 2.0086486486486486,
290
- "grad_norm": 0.20281045138835907,
291
  "learning_rate": 3.2752403846153846e-05,
292
- "loss": 0.1651,
293
  "step": 380
294
  },
295
  {
296
  "epoch": 2.0194594594594593,
297
- "grad_norm": 28.839569091796875,
298
  "learning_rate": 3.215144230769231e-05,
299
- "loss": 0.3506,
300
  "step": 390
301
  },
302
  {
303
  "epoch": 2.0302702702702704,
304
- "grad_norm": 22.747913360595703,
305
  "learning_rate": 3.155048076923077e-05,
306
- "loss": 0.6242,
307
  "step": 400
308
  },
309
  {
310
  "epoch": 2.041081081081081,
311
- "grad_norm": 29.88939094543457,
312
  "learning_rate": 3.094951923076923e-05,
313
- "loss": 0.4168,
314
  "step": 410
315
  },
316
  {
317
  "epoch": 2.051891891891892,
318
- "grad_norm": 0.13965100049972534,
319
  "learning_rate": 3.0348557692307694e-05,
320
- "loss": 0.0609,
321
  "step": 420
322
  },
323
  {
324
  "epoch": 2.0627027027027025,
325
- "grad_norm": 7.307075500488281,
326
  "learning_rate": 2.974759615384616e-05,
327
- "loss": 0.4065,
328
  "step": 430
329
  },
330
  {
331
  "epoch": 2.0735135135135137,
332
- "grad_norm": 0.021403660997748375,
333
  "learning_rate": 2.9146634615384614e-05,
334
- "loss": 0.3352,
335
  "step": 440
336
  },
337
  {
338
  "epoch": 2.0843243243243244,
339
- "grad_norm": 17.655433654785156,
340
  "learning_rate": 2.854567307692308e-05,
341
- "loss": 0.1129,
342
  "step": 450
343
  },
344
  {
345
  "epoch": 2.095135135135135,
346
- "grad_norm": 0.23121196031570435,
347
  "learning_rate": 2.794471153846154e-05,
348
- "loss": 0.1074,
349
  "step": 460
350
  },
351
  {
352
  "epoch": 2.1059459459459458,
353
- "grad_norm": 0.016081752255558968,
354
  "learning_rate": 2.734375e-05,
355
- "loss": 0.5961,
356
  "step": 470
357
  },
358
  {
359
  "epoch": 2.116756756756757,
360
- "grad_norm": 0.013322114944458008,
361
  "learning_rate": 2.6742788461538466e-05,
362
- "loss": 0.2839,
363
  "step": 480
364
  },
365
  {
366
  "epoch": 2.1275675675675676,
367
- "grad_norm": 4.522771835327148,
368
  "learning_rate": 2.6141826923076925e-05,
369
- "loss": 0.0814,
370
  "step": 490
371
  },
372
  {
373
  "epoch": 2.1383783783783783,
374
- "grad_norm": 0.02786172181367874,
375
  "learning_rate": 2.5540865384615387e-05,
376
- "loss": 0.0448,
377
  "step": 500
378
  },
379
  {
380
  "epoch": 2.149189189189189,
381
- "grad_norm": 16.996612548828125,
382
  "learning_rate": 2.493990384615385e-05,
383
- "loss": 0.3222,
384
  "step": 510
385
  },
386
  {
387
  "epoch": 2.16,
388
- "grad_norm": 1.9226462841033936,
389
  "learning_rate": 2.4338942307692307e-05,
390
- "loss": 0.1569,
391
  "step": 520
392
  },
393
  {
394
  "epoch": 2.170810810810811,
395
- "grad_norm": 1.0920050144195557,
396
  "learning_rate": 2.373798076923077e-05,
397
- "loss": 0.1651,
398
  "step": 530
399
  },
400
  {
401
  "epoch": 2.1816216216216215,
402
- "grad_norm": 0.230145663022995,
403
  "learning_rate": 2.313701923076923e-05,
404
- "loss": 0.2688,
405
  "step": 540
406
  },
407
  {
408
  "epoch": 2.1924324324324322,
409
- "grad_norm": 5.873709201812744,
410
  "learning_rate": 2.2536057692307694e-05,
411
- "loss": 0.5011,
412
  "step": 550
413
  },
414
  {
415
  "epoch": 2.2010810810810812,
416
- "eval_accuracy": 0.8215767634854771,
417
- "eval_loss": 0.5440417528152466,
418
- "eval_runtime": 588.0061,
419
- "eval_samples_per_second": 0.41,
420
- "eval_steps_per_second": 0.104,
421
  "step": 558
422
  },
423
  {
424
  "epoch": 3.002162162162162,
425
- "grad_norm": 12.916172981262207,
426
  "learning_rate": 2.1935096153846156e-05,
427
- "loss": 0.6714,
428
  "step": 560
429
  },
430
  {
431
  "epoch": 3.012972972972973,
432
- "grad_norm": 18.048948287963867,
433
  "learning_rate": 2.1334134615384614e-05,
434
- "loss": 0.1235,
435
  "step": 570
436
  },
437
  {
438
  "epoch": 3.023783783783784,
439
- "grad_norm": 0.9115002751350403,
440
  "learning_rate": 2.073317307692308e-05,
441
- "loss": 0.1749,
442
  "step": 580
443
  },
444
  {
445
  "epoch": 3.0345945945945947,
446
- "grad_norm": 18.67428207397461,
447
  "learning_rate": 2.0132211538461542e-05,
448
- "loss": 0.1547,
449
  "step": 590
450
  },
451
  {
452
  "epoch": 3.0454054054054054,
453
- "grad_norm": 0.07008351385593414,
454
  "learning_rate": 1.953125e-05,
455
- "loss": 0.0246,
456
  "step": 600
457
  },
458
  {
459
  "epoch": 3.056216216216216,
460
- "grad_norm": 0.0067512416280806065,
461
  "learning_rate": 1.8930288461538462e-05,
462
- "loss": 0.0041,
463
  "step": 610
464
  },
465
  {
466
  "epoch": 3.0670270270270272,
467
- "grad_norm": 0.01588040590286255,
468
  "learning_rate": 1.832932692307692e-05,
469
- "loss": 0.1187,
470
  "step": 620
471
  },
472
  {
473
  "epoch": 3.077837837837838,
474
- "grad_norm": 13.167031288146973,
475
  "learning_rate": 1.7728365384615387e-05,
476
- "loss": 0.0238,
477
  "step": 630
478
  },
479
  {
480
  "epoch": 3.0886486486486486,
481
- "grad_norm": 0.06922053545713425,
482
  "learning_rate": 1.712740384615385e-05,
483
- "loss": 0.1063,
484
  "step": 640
485
  },
486
  {
487
  "epoch": 3.0994594594594593,
488
- "grad_norm": 74.04322814941406,
489
  "learning_rate": 1.6526442307692307e-05,
490
- "loss": 0.376,
491
  "step": 650
492
  },
493
  {
494
  "epoch": 3.1102702702702705,
495
- "grad_norm": 32.97873306274414,
496
  "learning_rate": 1.592548076923077e-05,
497
- "loss": 0.4791,
498
  "step": 660
499
  },
500
  {
501
  "epoch": 3.121081081081081,
502
- "grad_norm": 0.11083012819290161,
503
  "learning_rate": 1.532451923076923e-05,
504
- "loss": 0.0166,
505
  "step": 670
506
  },
507
  {
508
  "epoch": 3.131891891891892,
509
- "grad_norm": 0.01234583742916584,
510
  "learning_rate": 1.4723557692307693e-05,
511
- "loss": 0.0195,
512
  "step": 680
513
  },
514
  {
515
  "epoch": 3.1427027027027026,
516
- "grad_norm": 19.750049591064453,
517
  "learning_rate": 1.4122596153846154e-05,
518
- "loss": 0.1691,
519
  "step": 690
520
  },
521
  {
522
  "epoch": 3.1535135135135137,
523
- "grad_norm": 0.6507856249809265,
524
  "learning_rate": 1.3521634615384616e-05,
525
- "loss": 0.2198,
526
  "step": 700
527
  },
528
  {
529
  "epoch": 3.1643243243243244,
530
- "grad_norm": 0.04525623098015785,
531
  "learning_rate": 1.292067307692308e-05,
532
- "loss": 0.3103,
533
  "step": 710
534
  },
535
  {
536
  "epoch": 3.175135135135135,
537
- "grad_norm": 0.009144825860857964,
538
  "learning_rate": 1.231971153846154e-05,
539
- "loss": 0.0903,
540
  "step": 720
541
  },
542
  {
543
  "epoch": 3.185945945945946,
544
- "grad_norm": 6.008044719696045,
545
  "learning_rate": 1.171875e-05,
546
- "loss": 0.2296,
547
  "step": 730
548
  },
549
  {
550
  "epoch": 3.1967567567567565,
551
- "grad_norm": 0.15373368561267853,
552
  "learning_rate": 1.111778846153846e-05,
553
- "loss": 0.3274,
554
  "step": 740
555
  },
556
  {
557
  "epoch": 3.2010810810810812,
558
- "eval_accuracy": 0.8257261410788381,
559
- "eval_loss": 0.683351993560791,
560
- "eval_runtime": 459.2726,
561
- "eval_samples_per_second": 0.525,
562
- "eval_steps_per_second": 0.133,
563
  "step": 744
564
  },
565
  {
566
  "epoch": 4.006486486486486,
567
- "grad_norm": 0.008657727390527725,
568
  "learning_rate": 1.0516826923076924e-05,
569
- "loss": 0.1495,
570
  "step": 750
571
  },
572
  {
573
  "epoch": 4.017297297297297,
574
- "grad_norm": 0.033678218722343445,
575
  "learning_rate": 9.915865384615385e-06,
576
- "loss": 0.0142,
577
  "step": 760
578
  },
579
  {
580
  "epoch": 4.028108108108108,
581
- "grad_norm": 50.353580474853516,
582
  "learning_rate": 9.314903846153847e-06,
583
- "loss": 0.1612,
584
  "step": 770
585
  },
586
  {
587
  "epoch": 4.0389189189189185,
588
- "grad_norm": 0.035760682076215744,
589
  "learning_rate": 8.713942307692307e-06,
590
- "loss": 0.2428,
591
  "step": 780
592
  },
593
  {
594
  "epoch": 4.04972972972973,
595
- "grad_norm": 12.059436798095703,
596
  "learning_rate": 8.112980769230769e-06,
597
- "loss": 0.2061,
598
  "step": 790
599
  },
600
  {
601
  "epoch": 4.060540540540541,
602
- "grad_norm": 0.006958181504160166,
603
  "learning_rate": 7.512019230769231e-06,
604
- "loss": 0.0098,
605
  "step": 800
606
  },
607
  {
608
  "epoch": 4.0713513513513515,
609
- "grad_norm": 0.04333299770951271,
610
  "learning_rate": 6.911057692307693e-06,
611
- "loss": 0.2669,
612
  "step": 810
613
  },
614
  {
615
  "epoch": 4.082162162162162,
616
- "grad_norm": 0.02775268815457821,
617
  "learning_rate": 6.310096153846154e-06,
618
- "loss": 0.0539,
619
  "step": 820
620
  },
621
  {
622
  "epoch": 4.092972972972973,
623
- "grad_norm": 0.48154217004776,
624
  "learning_rate": 5.709134615384616e-06,
625
- "loss": 0.0897,
626
  "step": 830
627
  },
628
  {
629
  "epoch": 4.103783783783784,
630
- "grad_norm": 0.012652811594307423,
631
  "learning_rate": 5.108173076923077e-06,
632
- "loss": 0.0778,
633
  "step": 840
634
  },
635
  {
636
  "epoch": 4.114594594594594,
637
- "grad_norm": 0.006140949670225382,
638
  "learning_rate": 4.507211538461539e-06,
639
- "loss": 0.0017,
640
  "step": 850
641
  },
642
  {
643
  "epoch": 4.125405405405405,
644
- "grad_norm": 11.695226669311523,
645
  "learning_rate": 3.90625e-06,
646
- "loss": 0.0203,
647
  "step": 860
648
  },
649
  {
650
  "epoch": 4.136216216216217,
651
- "grad_norm": 0.015799295157194138,
652
  "learning_rate": 3.3052884615384617e-06,
653
- "loss": 0.0149,
654
  "step": 870
655
  },
656
  {
657
  "epoch": 4.147027027027027,
658
- "grad_norm": 21.965198516845703,
659
  "learning_rate": 2.7043269230769233e-06,
660
- "loss": 0.3756,
661
  "step": 880
662
  },
663
  {
664
  "epoch": 4.157837837837838,
665
- "grad_norm": 0.17932982742786407,
666
  "learning_rate": 2.103365384615385e-06,
667
- "loss": 0.1004,
668
  "step": 890
669
  },
670
  {
671
  "epoch": 4.168648648648649,
672
- "grad_norm": 0.005346647929400206,
673
  "learning_rate": 1.5024038461538464e-06,
674
- "loss": 0.1684,
675
  "step": 900
676
  },
677
  {
678
  "epoch": 4.179459459459459,
679
- "grad_norm": 6.568191051483154,
680
  "learning_rate": 9.014423076923077e-07,
681
- "loss": 0.0342,
682
  "step": 910
683
  },
684
  {
685
  "epoch": 4.19027027027027,
686
- "grad_norm": 0.2238602489233017,
687
  "learning_rate": 3.0048076923076924e-07,
688
- "loss": 0.0083,
689
  "step": 920
690
  },
691
  {
692
  "epoch": 4.195675675675676,
693
- "eval_accuracy": 0.8547717842323651,
694
- "eval_loss": 0.5500421524047852,
695
- "eval_runtime": 452.3168,
696
- "eval_samples_per_second": 0.533,
697
- "eval_steps_per_second": 0.135,
698
  "step": 925
699
  },
700
  {
701
  "epoch": 4.195675675675676,
702
  "step": 925,
703
  "total_flos": 3.2312722580082524e+18,
704
- "train_loss": 0.3789604688234426,
705
- "train_runtime": 19695.7134,
706
- "train_samples_per_second": 0.188,
707
- "train_steps_per_second": 0.047
708
  },
709
  {
710
  "epoch": 4.195675675675676,
711
- "eval_accuracy": 0.9676113360323887,
712
- "eval_loss": 0.1311430186033249,
713
- "eval_runtime": 1467.8713,
714
- "eval_samples_per_second": 0.505,
715
- "eval_steps_per_second": 0.127,
716
  "step": 925
717
  },
718
  {
719
  "epoch": 4.195675675675676,
720
- "eval_accuracy": 0.8269662921348314,
721
- "eval_loss": 0.7350317239761353,
722
- "eval_runtime": 796.0622,
723
- "eval_samples_per_second": 0.559,
724
- "eval_steps_per_second": 0.141,
725
  "step": 925
726
  },
727
  {
728
  "epoch": 4.195675675675676,
729
- "eval_accuracy": 0.8547717842323651,
730
- "eval_loss": 0.5500420928001404,
731
- "eval_runtime": 434.3892,
732
- "eval_samples_per_second": 0.555,
733
- "eval_steps_per_second": 0.14,
734
  "step": 925
735
  }
736
  ],
 
1
  {
2
+ "best_metric": 0.8381742738589212,
3
  "best_model_checkpoint": "videomae-timesformer-surf-analytics\\checkpoint-925",
4
  "epoch": 4.195675675675676,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.010810810810810811,
13
+ "grad_norm": 11.895417213439941,
14
  "learning_rate": 5.376344086021506e-06,
15
+ "loss": 1.5009,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.021621621621621623,
20
+ "grad_norm": 11.518338203430176,
21
  "learning_rate": 1.0752688172043012e-05,
22
+ "loss": 1.1077,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.032432432432432434,
27
+ "grad_norm": 10.138296127319336,
28
  "learning_rate": 1.6129032258064517e-05,
29
+ "loss": 1.3124,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.043243243243243246,
34
+ "grad_norm": 9.477767944335938,
35
  "learning_rate": 2.1505376344086024e-05,
36
+ "loss": 1.0199,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.05405405405405406,
41
+ "grad_norm": 11.046083450317383,
42
  "learning_rate": 2.6881720430107527e-05,
43
+ "loss": 1.038,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.06486486486486487,
48
+ "grad_norm": 12.39089584350586,
49
  "learning_rate": 3.2258064516129034e-05,
50
+ "loss": 0.9406,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.07567567567567568,
55
+ "grad_norm": 12.075498580932617,
56
  "learning_rate": 3.763440860215054e-05,
57
+ "loss": 1.0054,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.08648648648648649,
62
+ "grad_norm": 9.824885368347168,
63
  "learning_rate": 4.301075268817205e-05,
64
+ "loss": 0.7344,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.0972972972972973,
69
+ "grad_norm": 10.267470359802246,
70
  "learning_rate": 4.8387096774193554e-05,
71
+ "loss": 0.6901,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.10810810810810811,
76
+ "grad_norm": 4.5673370361328125,
77
  "learning_rate": 4.957932692307692e-05,
78
+ "loss": 0.4999,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.11891891891891893,
83
+ "grad_norm": 7.499106407165527,
84
  "learning_rate": 4.897836538461539e-05,
85
+ "loss": 0.7291,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.12972972972972974,
90
+ "grad_norm": 13.209514617919922,
91
  "learning_rate": 4.8377403846153846e-05,
92
+ "loss": 0.4908,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.14054054054054055,
97
+ "grad_norm": 27.258346557617188,
98
  "learning_rate": 4.777644230769231e-05,
99
+ "loss": 0.6281,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.15135135135135136,
104
+ "grad_norm": 23.13751220703125,
105
  "learning_rate": 4.717548076923077e-05,
106
+ "loss": 0.7854,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.16216216216216217,
111
+ "grad_norm": 21.831802368164062,
112
  "learning_rate": 4.657451923076923e-05,
113
+ "loss": 0.6653,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.17297297297297298,
118
+ "grad_norm": 3.0441360473632812,
119
  "learning_rate": 4.5973557692307694e-05,
120
+ "loss": 1.2533,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.1837837837837838,
125
+ "grad_norm": 10.554399490356445,
126
  "learning_rate": 4.5372596153846156e-05,
127
+ "loss": 0.546,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.1945945945945946,
132
+ "grad_norm": 14.284193992614746,
133
  "learning_rate": 4.477163461538462e-05,
134
+ "loss": 0.4491,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.20108108108108108,
139
+ "eval_accuracy": 0.7385892116182573,
140
+ "eval_loss": 0.6939307451248169,
141
+ "eval_runtime": 407.9813,
142
+ "eval_samples_per_second": 0.591,
143
+ "eval_steps_per_second": 0.15,
144
  "step": 186
145
  },
146
  {
147
  "epoch": 1.0043243243243243,
148
+ "grad_norm": 11.024110794067383,
149
  "learning_rate": 4.417067307692308e-05,
150
+ "loss": 0.4377,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.0151351351351352,
155
+ "grad_norm": 4.518445014953613,
156
  "learning_rate": 4.3569711538461535e-05,
157
+ "loss": 0.0803,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.025945945945946,
162
+ "grad_norm": 0.42581477761268616,
163
  "learning_rate": 4.2968750000000004e-05,
164
+ "loss": 0.3532,
165
  "step": 210
166
  },
167
  {
168
  "epoch": 1.0367567567567568,
169
+ "grad_norm": 13.55870246887207,
170
  "learning_rate": 4.2367788461538466e-05,
171
+ "loss": 0.7917,
172
  "step": 220
173
  },
174
  {
175
  "epoch": 1.0475675675675675,
176
+ "grad_norm": 13.94046401977539,
177
  "learning_rate": 4.176682692307692e-05,
178
+ "loss": 0.1946,
179
  "step": 230
180
  },
181
  {
182
  "epoch": 1.0583783783783784,
183
+ "grad_norm": 24.760360717773438,
184
  "learning_rate": 4.116586538461539e-05,
185
+ "loss": 0.9298,
186
  "step": 240
187
  },
188
  {
189
  "epoch": 1.0691891891891891,
190
+ "grad_norm": 7.651761531829834,
191
  "learning_rate": 4.0564903846153846e-05,
192
+ "loss": 0.7205,
193
  "step": 250
194
  },
195
  {
196
  "epoch": 1.08,
197
+ "grad_norm": 16.21826171875,
198
  "learning_rate": 3.996394230769231e-05,
199
+ "loss": 0.6958,
200
  "step": 260
201
  },
202
  {
203
  "epoch": 1.0908108108108108,
204
+ "grad_norm": 16.531909942626953,
205
  "learning_rate": 3.936298076923077e-05,
206
+ "loss": 0.2937,
207
  "step": 270
208
  },
209
  {
210
  "epoch": 1.1016216216216217,
211
+ "grad_norm": 16.31216049194336,
212
  "learning_rate": 3.876201923076923e-05,
213
+ "loss": 0.6104,
214
  "step": 280
215
  },
216
  {
217
  "epoch": 1.1124324324324324,
218
+ "grad_norm": 8.48189640045166,
219
  "learning_rate": 3.8161057692307694e-05,
220
+ "loss": 0.4254,
221
  "step": 290
222
  },
223
  {
224
  "epoch": 1.1232432432432433,
225
+ "grad_norm": 29.133272171020508,
226
  "learning_rate": 3.7560096153846156e-05,
227
+ "loss": 0.262,
228
  "step": 300
229
  },
230
  {
231
  "epoch": 1.134054054054054,
232
+ "grad_norm": 20.123014450073242,
233
  "learning_rate": 3.695913461538462e-05,
234
+ "loss": 0.4921,
235
  "step": 310
236
  },
237
  {
238
  "epoch": 1.144864864864865,
239
+ "grad_norm": 9.895979881286621,
240
  "learning_rate": 3.635817307692308e-05,
241
+ "loss": 0.2059,
242
  "step": 320
243
  },
244
  {
245
  "epoch": 1.1556756756756756,
246
+ "grad_norm": 11.974223136901855,
247
  "learning_rate": 3.5757211538461535e-05,
248
+ "loss": 0.2591,
249
  "step": 330
250
  },
251
  {
252
  "epoch": 1.1664864864864866,
253
+ "grad_norm": 0.1094137504696846,
254
  "learning_rate": 3.5156250000000004e-05,
255
+ "loss": 0.437,
256
  "step": 340
257
  },
258
  {
259
  "epoch": 1.1772972972972973,
260
+ "grad_norm": 0.12011051177978516,
261
  "learning_rate": 3.4555288461538466e-05,
262
+ "loss": 0.5299,
263
  "step": 350
264
  },
265
  {
266
  "epoch": 1.1881081081081082,
267
+ "grad_norm": 0.32698747515678406,
268
  "learning_rate": 3.395432692307692e-05,
269
+ "loss": 0.1667,
270
  "step": 360
271
  },
272
  {
273
  "epoch": 1.1989189189189189,
274
+ "grad_norm": 2.423952102661133,
275
  "learning_rate": 3.335336538461539e-05,
276
+ "loss": 0.5627,
277
  "step": 370
278
  },
279
  {
280
  "epoch": 1.201081081081081,
281
+ "eval_accuracy": 0.7759336099585062,
282
+ "eval_loss": 0.6805610060691833,
283
+ "eval_runtime": 399.3517,
284
+ "eval_samples_per_second": 0.603,
285
+ "eval_steps_per_second": 0.153,
286
  "step": 372
287
  },
288
  {
289
  "epoch": 2.0086486486486486,
290
+ "grad_norm": 0.037202008068561554,
291
  "learning_rate": 3.2752403846153846e-05,
292
+ "loss": 0.0415,
293
  "step": 380
294
  },
295
  {
296
  "epoch": 2.0194594594594593,
297
+ "grad_norm": 23.98035430908203,
298
  "learning_rate": 3.215144230769231e-05,
299
+ "loss": 0.3379,
300
  "step": 390
301
  },
302
  {
303
  "epoch": 2.0302702702702704,
304
+ "grad_norm": 31.957252502441406,
305
  "learning_rate": 3.155048076923077e-05,
306
+ "loss": 0.357,
307
  "step": 400
308
  },
309
  {
310
  "epoch": 2.041081081081081,
311
+ "grad_norm": 1.1118696928024292,
312
  "learning_rate": 3.094951923076923e-05,
313
+ "loss": 0.1775,
314
  "step": 410
315
  },
316
  {
317
  "epoch": 2.051891891891892,
318
+ "grad_norm": 0.05329513177275658,
319
  "learning_rate": 3.0348557692307694e-05,
320
+ "loss": 0.0175,
321
  "step": 420
322
  },
323
  {
324
  "epoch": 2.0627027027027025,
325
+ "grad_norm": 32.40390396118164,
326
  "learning_rate": 2.974759615384616e-05,
327
+ "loss": 0.4569,
328
  "step": 430
329
  },
330
  {
331
  "epoch": 2.0735135135135137,
332
+ "grad_norm": 0.15056586265563965,
333
  "learning_rate": 2.9146634615384614e-05,
334
+ "loss": 0.3911,
335
  "step": 440
336
  },
337
  {
338
  "epoch": 2.0843243243243244,
339
+ "grad_norm": 0.2210923731327057,
340
  "learning_rate": 2.854567307692308e-05,
341
+ "loss": 0.112,
342
  "step": 450
343
  },
344
  {
345
  "epoch": 2.095135135135135,
346
+ "grad_norm": 0.9941416382789612,
347
  "learning_rate": 2.794471153846154e-05,
348
+ "loss": 0.3037,
349
  "step": 460
350
  },
351
  {
352
  "epoch": 2.1059459459459458,
353
+ "grad_norm": 0.035190433263778687,
354
  "learning_rate": 2.734375e-05,
355
+ "loss": 0.6371,
356
  "step": 470
357
  },
358
  {
359
  "epoch": 2.116756756756757,
360
+ "grad_norm": 0.04186534136533737,
361
  "learning_rate": 2.6742788461538466e-05,
362
+ "loss": 0.4244,
363
  "step": 480
364
  },
365
  {
366
  "epoch": 2.1275675675675676,
367
+ "grad_norm": 0.5409103631973267,
368
  "learning_rate": 2.6141826923076925e-05,
369
+ "loss": 0.311,
370
  "step": 490
371
  },
372
  {
373
  "epoch": 2.1383783783783783,
374
+ "grad_norm": 0.04897398501634598,
375
  "learning_rate": 2.5540865384615387e-05,
376
+ "loss": 0.0555,
377
  "step": 500
378
  },
379
  {
380
  "epoch": 2.149189189189189,
381
+ "grad_norm": 16.756502151489258,
382
  "learning_rate": 2.493990384615385e-05,
383
+ "loss": 0.264,
384
  "step": 510
385
  },
386
  {
387
  "epoch": 2.16,
388
+ "grad_norm": 1.5812554359436035,
389
  "learning_rate": 2.4338942307692307e-05,
390
+ "loss": 0.1869,
391
  "step": 520
392
  },
393
  {
394
  "epoch": 2.170810810810811,
395
+ "grad_norm": 0.20836937427520752,
396
  "learning_rate": 2.373798076923077e-05,
397
+ "loss": 0.2804,
398
  "step": 530
399
  },
400
  {
401
  "epoch": 2.1816216216216215,
402
+ "grad_norm": 0.14942559599876404,
403
  "learning_rate": 2.313701923076923e-05,
404
+ "loss": 0.2552,
405
  "step": 540
406
  },
407
  {
408
  "epoch": 2.1924324324324322,
409
+ "grad_norm": 0.09908430278301239,
410
  "learning_rate": 2.2536057692307694e-05,
411
+ "loss": 0.5189,
412
  "step": 550
413
  },
414
  {
415
  "epoch": 2.2010810810810812,
416
+ "eval_accuracy": 0.8174273858921162,
417
+ "eval_loss": 0.651021420955658,
418
+ "eval_runtime": 406.013,
419
+ "eval_samples_per_second": 0.594,
420
+ "eval_steps_per_second": 0.15,
421
  "step": 558
422
  },
423
  {
424
  "epoch": 3.002162162162162,
425
+ "grad_norm": 15.29819107055664,
426
  "learning_rate": 2.1935096153846156e-05,
427
+ "loss": 0.7952,
428
  "step": 560
429
  },
430
  {
431
  "epoch": 3.012972972972973,
432
+ "grad_norm": 7.53931188583374,
433
  "learning_rate": 2.1334134615384614e-05,
434
+ "loss": 0.0482,
435
  "step": 570
436
  },
437
  {
438
  "epoch": 3.023783783783784,
439
+ "grad_norm": 0.01849238947033882,
440
  "learning_rate": 2.073317307692308e-05,
441
+ "loss": 0.0809,
442
  "step": 580
443
  },
444
  {
445
  "epoch": 3.0345945945945947,
446
+ "grad_norm": 28.94001007080078,
447
  "learning_rate": 2.0132211538461542e-05,
448
+ "loss": 0.0708,
449
  "step": 590
450
  },
451
  {
452
  "epoch": 3.0454054054054054,
453
+ "grad_norm": 0.031998779624700546,
454
  "learning_rate": 1.953125e-05,
455
+ "loss": 0.0426,
456
  "step": 600
457
  },
458
  {
459
  "epoch": 3.056216216216216,
460
+ "grad_norm": 6.872684001922607,
461
  "learning_rate": 1.8930288461538462e-05,
462
+ "loss": 0.1062,
463
  "step": 610
464
  },
465
  {
466
  "epoch": 3.0670270270270272,
467
+ "grad_norm": 0.02178012765944004,
468
  "learning_rate": 1.832932692307692e-05,
469
+ "loss": 0.1908,
470
  "step": 620
471
  },
472
  {
473
  "epoch": 3.077837837837838,
474
+ "grad_norm": 12.06849193572998,
475
  "learning_rate": 1.7728365384615387e-05,
476
+ "loss": 0.0114,
477
  "step": 630
478
  },
479
  {
480
  "epoch": 3.0886486486486486,
481
+ "grad_norm": 0.18928392231464386,
482
  "learning_rate": 1.712740384615385e-05,
483
+ "loss": 0.1763,
484
  "step": 640
485
  },
486
  {
487
  "epoch": 3.0994594594594593,
488
+ "grad_norm": 10.532954216003418,
489
  "learning_rate": 1.6526442307692307e-05,
490
+ "loss": 0.3634,
491
  "step": 650
492
  },
493
  {
494
  "epoch": 3.1102702702702705,
495
+ "grad_norm": 18.90543556213379,
496
  "learning_rate": 1.592548076923077e-05,
497
+ "loss": 0.728,
498
  "step": 660
499
  },
500
  {
501
  "epoch": 3.121081081081081,
502
+ "grad_norm": 0.35980573296546936,
503
  "learning_rate": 1.532451923076923e-05,
504
+ "loss": 0.144,
505
  "step": 670
506
  },
507
  {
508
  "epoch": 3.131891891891892,
509
+ "grad_norm": 0.03495261073112488,
510
  "learning_rate": 1.4723557692307693e-05,
511
+ "loss": 0.0163,
512
  "step": 680
513
  },
514
  {
515
  "epoch": 3.1427027027027026,
516
+ "grad_norm": 24.22405242919922,
517
  "learning_rate": 1.4122596153846154e-05,
518
+ "loss": 0.1584,
519
  "step": 690
520
  },
521
  {
522
  "epoch": 3.1535135135135137,
523
+ "grad_norm": 0.026546325534582138,
524
  "learning_rate": 1.3521634615384616e-05,
525
+ "loss": 0.3465,
526
  "step": 700
527
  },
528
  {
529
  "epoch": 3.1643243243243244,
530
+ "grad_norm": 0.43370339274406433,
531
  "learning_rate": 1.292067307692308e-05,
532
+ "loss": 0.3739,
533
  "step": 710
534
  },
535
  {
536
  "epoch": 3.175135135135135,
537
+ "grad_norm": 0.02699950709939003,
538
  "learning_rate": 1.231971153846154e-05,
539
+ "loss": 0.1644,
540
  "step": 720
541
  },
542
  {
543
  "epoch": 3.185945945945946,
544
+ "grad_norm": 2.032027006149292,
545
  "learning_rate": 1.171875e-05,
546
+ "loss": 0.1225,
547
  "step": 730
548
  },
549
  {
550
  "epoch": 3.1967567567567565,
551
+ "grad_norm": 0.006064319983124733,
552
  "learning_rate": 1.111778846153846e-05,
553
+ "loss": 0.2503,
554
  "step": 740
555
  },
556
  {
557
  "epoch": 3.2010810810810812,
558
+ "eval_accuracy": 0.8174273858921162,
559
+ "eval_loss": 0.6731572151184082,
560
+ "eval_runtime": 400.1385,
561
+ "eval_samples_per_second": 0.602,
562
+ "eval_steps_per_second": 0.152,
563
  "step": 744
564
  },
565
  {
566
  "epoch": 4.006486486486486,
567
+ "grad_norm": 0.4040788412094116,
568
  "learning_rate": 1.0516826923076924e-05,
569
+ "loss": 0.2375,
570
  "step": 750
571
  },
572
  {
573
  "epoch": 4.017297297297297,
574
+ "grad_norm": 11.23088264465332,
575
  "learning_rate": 9.915865384615385e-06,
576
+ "loss": 0.1009,
577
  "step": 760
578
  },
579
  {
580
  "epoch": 4.028108108108108,
581
+ "grad_norm": 44.4477424621582,
582
  "learning_rate": 9.314903846153847e-06,
583
+ "loss": 0.1268,
584
  "step": 770
585
  },
586
  {
587
  "epoch": 4.0389189189189185,
588
+ "grad_norm": 0.01056050043553114,
589
  "learning_rate": 8.713942307692307e-06,
590
+ "loss": 0.0738,
591
  "step": 780
592
  },
593
  {
594
  "epoch": 4.04972972972973,
595
+ "grad_norm": 14.111398696899414,
596
  "learning_rate": 8.112980769230769e-06,
597
+ "loss": 0.2126,
598
  "step": 790
599
  },
600
  {
601
  "epoch": 4.060540540540541,
602
+ "grad_norm": 0.014713864773511887,
603
  "learning_rate": 7.512019230769231e-06,
604
+ "loss": 0.0011,
605
  "step": 800
606
  },
607
  {
608
  "epoch": 4.0713513513513515,
609
+ "grad_norm": 0.010851687751710415,
610
  "learning_rate": 6.911057692307693e-06,
611
+ "loss": 0.1786,
612
  "step": 810
613
  },
614
  {
615
  "epoch": 4.082162162162162,
616
+ "grad_norm": 0.050808586180210114,
617
  "learning_rate": 6.310096153846154e-06,
618
+ "loss": 0.0783,
619
  "step": 820
620
  },
621
  {
622
  "epoch": 4.092972972972973,
623
+ "grad_norm": 0.037915121763944626,
624
  "learning_rate": 5.709134615384616e-06,
625
+ "loss": 0.2292,
626
  "step": 830
627
  },
628
  {
629
  "epoch": 4.103783783783784,
630
+ "grad_norm": 2.1745355129241943,
631
  "learning_rate": 5.108173076923077e-06,
632
+ "loss": 0.1868,
633
  "step": 840
634
  },
635
  {
636
  "epoch": 4.114594594594594,
637
+ "grad_norm": 0.020182184875011444,
638
  "learning_rate": 4.507211538461539e-06,
639
+ "loss": 0.0016,
640
  "step": 850
641
  },
642
  {
643
  "epoch": 4.125405405405405,
644
+ "grad_norm": 23.419614791870117,
645
  "learning_rate": 3.90625e-06,
646
+ "loss": 0.0688,
647
  "step": 860
648
  },
649
  {
650
  "epoch": 4.136216216216217,
651
+ "grad_norm": 0.5133500695228577,
652
  "learning_rate": 3.3052884615384617e-06,
653
+ "loss": 0.0782,
654
  "step": 870
655
  },
656
  {
657
  "epoch": 4.147027027027027,
658
+ "grad_norm": 1.0855499505996704,
659
  "learning_rate": 2.7043269230769233e-06,
660
+ "loss": 0.0486,
661
  "step": 880
662
  },
663
  {
664
  "epoch": 4.157837837837838,
665
+ "grad_norm": 0.05244865640997887,
666
  "learning_rate": 2.103365384615385e-06,
667
+ "loss": 0.0217,
668
  "step": 890
669
  },
670
  {
671
  "epoch": 4.168648648648649,
672
+ "grad_norm": 0.013233544304966927,
673
  "learning_rate": 1.5024038461538464e-06,
674
+ "loss": 0.2434,
675
  "step": 900
676
  },
677
  {
678
  "epoch": 4.179459459459459,
679
+ "grad_norm": 12.086763381958008,
680
  "learning_rate": 9.014423076923077e-07,
681
+ "loss": 0.0501,
682
  "step": 910
683
  },
684
  {
685
  "epoch": 4.19027027027027,
686
+ "grad_norm": 0.0869203507900238,
687
  "learning_rate": 3.0048076923076924e-07,
688
+ "loss": 0.0159,
689
  "step": 920
690
  },
691
  {
692
  "epoch": 4.195675675675676,
693
+ "eval_accuracy": 0.8381742738589212,
694
+ "eval_loss": 0.6192311644554138,
695
+ "eval_runtime": 401.9033,
696
+ "eval_samples_per_second": 0.6,
697
+ "eval_steps_per_second": 0.152,
698
  "step": 925
699
  },
700
  {
701
  "epoch": 4.195675675675676,
702
  "step": 925,
703
  "total_flos": 3.2312722580082524e+18,
704
+ "train_loss": 0.3796664474924674,
705
+ "train_runtime": 18773.5619,
706
+ "train_samples_per_second": 0.197,
707
+ "train_steps_per_second": 0.049
708
  },
709
  {
710
  "epoch": 4.195675675675676,
711
+ "eval_accuracy": 0.9770580296896086,
712
+ "eval_loss": 0.11411414295434952,
713
+ "eval_runtime": 1291.965,
714
+ "eval_samples_per_second": 0.574,
715
+ "eval_steps_per_second": 0.144,
716
  "step": 925
717
  },
718
  {
719
  "epoch": 4.195675675675676,
720
+ "eval_accuracy": 0.8134831460674158,
721
+ "eval_loss": 0.7141955494880676,
722
+ "eval_runtime": 720.0647,
723
+ "eval_samples_per_second": 0.618,
724
+ "eval_steps_per_second": 0.156,
725
  "step": 925
726
  },
727
  {
728
  "epoch": 4.195675675675676,
729
+ "eval_accuracy": 0.8381742738589212,
730
+ "eval_loss": 0.6192311644554138,
731
+ "eval_runtime": 391.274,
732
+ "eval_samples_per_second": 0.616,
733
+ "eval_steps_per_second": 0.156,
734
  "step": 925
735
  }
736
  ],
val_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.195675675675676,
3
- "eval_accuracy": 0.8547717842323651,
4
- "eval_loss": 0.5500420928001404,
5
- "eval_runtime": 434.3892,
6
- "eval_samples_per_second": 0.555,
7
- "eval_steps_per_second": 0.14
8
  }
 
1
  {
2
  "epoch": 4.195675675675676,
3
+ "eval_accuracy": 0.8381742738589212,
4
+ "eval_loss": 0.6192311644554138,
5
+ "eval_runtime": 391.274,
6
+ "eval_samples_per_second": 0.616,
7
+ "eval_steps_per_second": 0.156
8
  }