miladfa7 commited on
Commit
a08ad0d
·
verified ·
1 Parent(s): c45fb1b

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.1,
3
- "eval_accuracy": 0.7073170731707317,
4
- "eval_loss": 1.3381569385528564,
5
- "eval_runtime": 5.7056,
6
- "eval_samples_per_second": 14.372,
7
- "eval_steps_per_second": 3.681
8
  }
 
1
  {
2
  "epoch": 9.1,
3
+ "eval_accuracy": 0.6951219512195121,
4
+ "eval_loss": 1.2568752765655518,
5
+ "eval_runtime": 5.6457,
6
+ "eval_samples_per_second": 14.524,
7
+ "eval_steps_per_second": 3.72
8
  }
confusion_matrix.jpg CHANGED
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07380e681d431f424eed6cfd1df25fd4a09f163e41c9122585f79cf3c0fb0bde
3
  size 344952716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a99ea9829e1200d06ae401d3bf8e36295f08492aa1c756f8551170726883efa
3
  size 344952716
runs/Aug10_21-42-01_prod3/events.out.tfevents.1754835019.prod3.218391.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18585cb323fa152689aa6129ba1bcbced696133a5732cab2eb1ab767df83168a
3
+ size 411
runs/Aug11_12-54-44_prod3/events.out.tfevents.1754888092.prod3.683740.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7b3495745bbb1dc24c763deeed8fe96b91e8e61e5846610e507c01212f2b93
3
+ size 8376
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.1,
3
- "eval_accuracy": 0.7073170731707317,
4
- "eval_loss": 1.3381569385528564,
5
- "eval_runtime": 5.7056,
6
- "eval_samples_per_second": 14.372,
7
- "eval_steps_per_second": 3.681
8
  }
 
1
  {
2
  "epoch": 9.1,
3
+ "eval_accuracy": 0.6951219512195121,
4
+ "eval_loss": 1.2568752765655518,
5
+ "eval_runtime": 5.6457,
6
+ "eval_samples_per_second": 14.524,
7
+ "eval_steps_per_second": 3.72
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 575,
3
- "best_metric": 0.7073170731707317,
4
- "best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_4/checkpoint-575",
5
  "epoch": 9.1,
6
  "eval_steps": 500,
7
  "global_step": 1150,
@@ -11,915 +11,915 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.008695652173913044,
14
- "grad_norm": 6.172690391540527,
15
  "learning_rate": 3.91304347826087e-06,
16
- "loss": 1.9556,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.017391304347826087,
21
- "grad_norm": 5.285815238952637,
22
  "learning_rate": 8.26086956521739e-06,
23
- "loss": 1.9446,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.02608695652173913,
28
- "grad_norm": 7.100526809692383,
29
  "learning_rate": 1.2608695652173914e-05,
30
- "loss": 1.8525,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.034782608695652174,
35
- "grad_norm": 7.057590961456299,
36
  "learning_rate": 1.6956521739130433e-05,
37
- "loss": 1.8414,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.043478260869565216,
42
- "grad_norm": 6.360028266906738,
43
  "learning_rate": 2.1304347826086958e-05,
44
- "loss": 1.6835,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.05217391304347826,
49
- "grad_norm": 6.762235164642334,
50
  "learning_rate": 2.5652173913043483e-05,
51
- "loss": 1.8069,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.06086956521739131,
56
- "grad_norm": 9.110674858093262,
57
  "learning_rate": 3e-05,
58
- "loss": 1.6742,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.06956521739130435,
63
- "grad_norm": 5.7140631675720215,
64
  "learning_rate": 3.4347826086956526e-05,
65
- "loss": 1.675,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.0782608695652174,
70
- "grad_norm": 7.441013336181641,
71
  "learning_rate": 3.869565217391305e-05,
72
- "loss": 1.7402,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.08695652173913043,
77
- "grad_norm": 8.591211318969727,
78
  "learning_rate": 4.304347826086957e-05,
79
- "loss": 1.6256,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.09565217391304348,
84
- "grad_norm": 11.78553581237793,
85
  "learning_rate": 4.739130434782609e-05,
86
- "loss": 1.4198,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.1,
91
- "eval_accuracy": 0.4634146341463415,
92
- "eval_loss": 1.4963948726654053,
93
- "eval_runtime": 6.2391,
94
- "eval_samples_per_second": 13.143,
95
- "eval_steps_per_second": 3.366,
96
  "step": 115
97
  },
98
  {
99
  "epoch": 1.0043478260869565,
100
- "grad_norm": 9.049060821533203,
101
  "learning_rate": 4.980676328502415e-05,
102
- "loss": 1.3079,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 1.0130434782608695,
107
- "grad_norm": 10.772295951843262,
108
  "learning_rate": 4.932367149758454e-05,
109
- "loss": 1.0031,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0217391304347827,
114
- "grad_norm": 20.79416275024414,
115
  "learning_rate": 4.884057971014493e-05,
116
- "loss": 0.9924,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.0304347826086957,
121
- "grad_norm": 6.840089321136475,
122
  "learning_rate": 4.8357487922705316e-05,
123
- "loss": 0.7422,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.0391304347826087,
128
- "grad_norm": 9.69269847869873,
129
  "learning_rate": 4.7874396135265706e-05,
130
- "loss": 1.0388,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.0478260869565217,
135
- "grad_norm": 9.885075569152832,
136
  "learning_rate": 4.739130434782609e-05,
137
- "loss": 0.9266,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.0565217391304347,
142
- "grad_norm": 16.479467391967773,
143
  "learning_rate": 4.690821256038648e-05,
144
- "loss": 0.8315,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.065217391304348,
149
- "grad_norm": 22.804908752441406,
150
  "learning_rate": 4.642512077294686e-05,
151
- "loss": 0.736,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.0739130434782609,
156
- "grad_norm": 12.778817176818848,
157
  "learning_rate": 4.594202898550725e-05,
158
- "loss": 0.8893,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.0826086956521739,
163
- "grad_norm": 18.733671188354492,
164
  "learning_rate": 4.545893719806764e-05,
165
- "loss": 1.3588,
166
  "step": 210
167
  },
168
  {
169
  "epoch": 1.0913043478260869,
170
- "grad_norm": 19.088119506835938,
171
  "learning_rate": 4.497584541062802e-05,
172
- "loss": 0.7065,
173
  "step": 220
174
  },
175
  {
176
  "epoch": 1.1,
177
- "grad_norm": 10.45570182800293,
178
  "learning_rate": 4.449275362318841e-05,
179
- "loss": 0.6674,
180
  "step": 230
181
  },
182
  {
183
  "epoch": 1.1,
184
- "eval_accuracy": 0.6585365853658537,
185
- "eval_loss": 1.0704329013824463,
186
- "eval_runtime": 6.1407,
187
- "eval_samples_per_second": 13.354,
188
- "eval_steps_per_second": 3.42,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 2.008695652173913,
193
- "grad_norm": 7.579151153564453,
194
  "learning_rate": 4.4009661835748794e-05,
195
- "loss": 0.2858,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 2.017391304347826,
200
- "grad_norm": 9.255645751953125,
201
  "learning_rate": 4.352657004830918e-05,
202
- "loss": 0.5303,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 2.026086956521739,
207
- "grad_norm": 1.6530203819274902,
208
  "learning_rate": 4.304347826086957e-05,
209
- "loss": 0.2094,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.034782608695652,
214
- "grad_norm": 13.71268367767334,
215
  "learning_rate": 4.256038647342995e-05,
216
- "loss": 0.2021,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.0434782608695654,
221
- "grad_norm": 2.5825889110565186,
222
  "learning_rate": 4.207729468599034e-05,
223
- "loss": 0.1329,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.0521739130434784,
228
- "grad_norm": 1.7412315607070923,
229
  "learning_rate": 4.1594202898550726e-05,
230
- "loss": 0.1377,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.0608695652173914,
235
- "grad_norm": 18.217609405517578,
236
  "learning_rate": 4.111111111111111e-05,
237
- "loss": 0.2466,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.0695652173913044,
242
- "grad_norm": 7.633990287780762,
243
  "learning_rate": 4.06280193236715e-05,
244
- "loss": 0.3553,
245
  "step": 310
246
  },
247
  {
248
  "epoch": 2.0782608695652174,
249
- "grad_norm": 19.997325897216797,
250
  "learning_rate": 4.014492753623188e-05,
251
- "loss": 0.3189,
252
  "step": 320
253
  },
254
  {
255
  "epoch": 2.0869565217391304,
256
- "grad_norm": 1.3608593940734863,
257
  "learning_rate": 3.966183574879227e-05,
258
- "loss": 0.1477,
259
  "step": 330
260
  },
261
  {
262
  "epoch": 2.0956521739130434,
263
- "grad_norm": 6.2525739669799805,
264
  "learning_rate": 3.9178743961352657e-05,
265
- "loss": 0.4521,
266
  "step": 340
267
  },
268
  {
269
  "epoch": 2.1,
270
- "eval_accuracy": 0.6341463414634146,
271
- "eval_loss": 0.9644566774368286,
272
- "eval_runtime": 6.2182,
273
- "eval_samples_per_second": 13.187,
274
- "eval_steps_per_second": 3.377,
275
  "step": 345
276
  },
277
  {
278
  "epoch": 3.0043478260869567,
279
- "grad_norm": 0.2062215656042099,
280
  "learning_rate": 3.869565217391305e-05,
281
- "loss": 0.0714,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 3.0130434782608697,
286
- "grad_norm": 0.07909725606441498,
287
  "learning_rate": 3.821256038647344e-05,
288
- "loss": 0.0455,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 3.0217391304347827,
293
- "grad_norm": 2.4341115951538086,
294
  "learning_rate": 3.772946859903382e-05,
295
- "loss": 0.0473,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 3.0304347826086957,
300
- "grad_norm": 0.42377185821533203,
301
  "learning_rate": 3.7246376811594204e-05,
302
- "loss": 0.111,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 3.0391304347826087,
307
- "grad_norm": 0.19808898866176605,
308
  "learning_rate": 3.6763285024154594e-05,
309
- "loss": 0.0683,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.0478260869565217,
314
- "grad_norm": 0.8478032946586609,
315
  "learning_rate": 3.628019323671498e-05,
316
- "loss": 0.0441,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.0565217391304347,
321
- "grad_norm": 0.15200255811214447,
322
  "learning_rate": 3.579710144927537e-05,
323
- "loss": 0.0142,
324
  "step": 410
325
  },
326
  {
327
  "epoch": 3.0652173913043477,
328
- "grad_norm": 0.1159711629152298,
329
  "learning_rate": 3.531400966183575e-05,
330
- "loss": 0.0203,
331
  "step": 420
332
  },
333
  {
334
  "epoch": 3.0739130434782607,
335
- "grad_norm": 0.1384093165397644,
336
  "learning_rate": 3.4830917874396135e-05,
337
- "loss": 0.1054,
338
  "step": 430
339
  },
340
  {
341
  "epoch": 3.082608695652174,
342
- "grad_norm": 0.5738388895988464,
343
  "learning_rate": 3.4347826086956526e-05,
344
- "loss": 0.0707,
345
  "step": 440
346
  },
347
  {
348
  "epoch": 3.091304347826087,
349
- "grad_norm": 0.16567471623420715,
350
  "learning_rate": 3.386473429951691e-05,
351
- "loss": 0.1002,
352
  "step": 450
353
  },
354
  {
355
  "epoch": 3.1,
356
- "grad_norm": 0.6657196283340454,
357
  "learning_rate": 3.338164251207729e-05,
358
- "loss": 0.1047,
359
  "step": 460
360
  },
361
  {
362
  "epoch": 3.1,
363
- "eval_accuracy": 0.6463414634146342,
364
- "eval_loss": 1.2479301691055298,
365
- "eval_runtime": 6.3163,
366
- "eval_samples_per_second": 12.982,
367
- "eval_steps_per_second": 3.325,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 4.008695652173913,
372
- "grad_norm": 2.4936015605926514,
373
  "learning_rate": 3.289855072463768e-05,
374
- "loss": 0.0126,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 4.017391304347826,
379
- "grad_norm": 25.129255294799805,
380
  "learning_rate": 3.2415458937198066e-05,
381
- "loss": 0.0839,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 4.026086956521739,
386
- "grad_norm": 0.05726484954357147,
387
  "learning_rate": 3.1932367149758457e-05,
388
- "loss": 0.0028,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 4.034782608695652,
393
- "grad_norm": 0.034710388630628586,
394
  "learning_rate": 3.144927536231884e-05,
395
- "loss": 0.0035,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 4.043478260869565,
400
- "grad_norm": 0.04727836325764656,
401
  "learning_rate": 3.0966183574879224e-05,
402
- "loss": 0.0505,
403
  "step": 510
404
  },
405
  {
406
  "epoch": 4.052173913043478,
407
- "grad_norm": 48.47065353393555,
408
  "learning_rate": 3.0483091787439617e-05,
409
- "loss": 0.1449,
410
  "step": 520
411
  },
412
  {
413
  "epoch": 4.060869565217391,
414
- "grad_norm": 0.09721066057682037,
415
  "learning_rate": 3e-05,
416
- "loss": 0.0098,
417
  "step": 530
418
  },
419
  {
420
  "epoch": 4.069565217391304,
421
- "grad_norm": 0.3183492422103882,
422
  "learning_rate": 2.951690821256039e-05,
423
- "loss": 0.0041,
424
  "step": 540
425
  },
426
  {
427
  "epoch": 4.078260869565217,
428
- "grad_norm": 0.02137056551873684,
429
  "learning_rate": 2.9033816425120775e-05,
430
- "loss": 0.0696,
431
  "step": 550
432
  },
433
  {
434
  "epoch": 4.086956521739131,
435
- "grad_norm": 0.04266425967216492,
436
  "learning_rate": 2.8550724637681158e-05,
437
- "loss": 0.004,
438
  "step": 560
439
  },
440
  {
441
  "epoch": 4.095652173913043,
442
- "grad_norm": 0.04853034391999245,
443
  "learning_rate": 2.806763285024155e-05,
444
- "loss": 0.0315,
445
  "step": 570
446
  },
447
  {
448
  "epoch": 4.1,
449
- "eval_accuracy": 0.7073170731707317,
450
- "eval_loss": 1.338157057762146,
451
- "eval_runtime": 6.3184,
452
- "eval_samples_per_second": 12.978,
453
- "eval_steps_per_second": 3.324,
454
  "step": 575
455
  },
456
  {
457
  "epoch": 5.004347826086956,
458
- "grad_norm": 0.028169039636850357,
459
  "learning_rate": 2.7584541062801932e-05,
460
- "loss": 0.0025,
461
  "step": 580
462
  },
463
  {
464
  "epoch": 5.01304347826087,
465
- "grad_norm": 0.04628513753414154,
466
  "learning_rate": 2.7101449275362322e-05,
467
- "loss": 0.0033,
468
  "step": 590
469
  },
470
  {
471
  "epoch": 5.021739130434782,
472
- "grad_norm": 0.19218483567237854,
473
  "learning_rate": 2.6618357487922706e-05,
474
- "loss": 0.0018,
475
  "step": 600
476
  },
477
  {
478
  "epoch": 5.030434782608696,
479
- "grad_norm": 0.04920341819524765,
480
  "learning_rate": 2.6135265700483093e-05,
481
- "loss": 0.0011,
482
  "step": 610
483
  },
484
  {
485
  "epoch": 5.039130434782609,
486
- "grad_norm": 0.015601912513375282,
487
  "learning_rate": 2.5652173913043483e-05,
488
- "loss": 0.0029,
489
  "step": 620
490
  },
491
  {
492
  "epoch": 5.047826086956522,
493
- "grad_norm": 0.041672300547361374,
494
  "learning_rate": 2.5169082125603866e-05,
495
- "loss": 0.0095,
496
  "step": 630
497
  },
498
  {
499
  "epoch": 5.056521739130435,
500
- "grad_norm": 0.031528741121292114,
501
  "learning_rate": 2.4685990338164253e-05,
502
- "loss": 0.0053,
503
  "step": 640
504
  },
505
  {
506
  "epoch": 5.065217391304348,
507
- "grad_norm": 0.06605325639247894,
508
  "learning_rate": 2.420289855072464e-05,
509
- "loss": 0.0013,
510
  "step": 650
511
  },
512
  {
513
  "epoch": 5.073913043478261,
514
- "grad_norm": 0.05129764974117279,
515
  "learning_rate": 2.3719806763285024e-05,
516
- "loss": 0.0011,
517
  "step": 660
518
  },
519
  {
520
  "epoch": 5.082608695652174,
521
- "grad_norm": 0.06676599383354187,
522
  "learning_rate": 2.323671497584541e-05,
523
- "loss": 0.0014,
524
  "step": 670
525
  },
526
  {
527
  "epoch": 5.091304347826087,
528
- "grad_norm": 0.03804684802889824,
529
  "learning_rate": 2.2753623188405797e-05,
530
- "loss": 0.0115,
531
  "step": 680
532
  },
533
  {
534
  "epoch": 5.1,
535
- "grad_norm": 0.027047328650951385,
536
  "learning_rate": 2.2270531400966184e-05,
537
  "loss": 0.0012,
538
  "step": 690
539
  },
540
  {
541
  "epoch": 5.1,
542
- "eval_accuracy": 0.6829268292682927,
543
- "eval_loss": 1.3492704629898071,
544
- "eval_runtime": 6.0789,
545
- "eval_samples_per_second": 13.489,
546
- "eval_steps_per_second": 3.455,
547
  "step": 690
548
  },
549
  {
550
  "epoch": 6.008695652173913,
551
- "grad_norm": 0.0289983619004488,
552
  "learning_rate": 2.178743961352657e-05,
553
- "loss": 0.001,
554
  "step": 700
555
  },
556
  {
557
  "epoch": 6.017391304347826,
558
- "grad_norm": 0.015507020987570286,
559
  "learning_rate": 2.1304347826086958e-05,
560
- "loss": 0.0014,
561
  "step": 710
562
  },
563
  {
564
  "epoch": 6.026086956521739,
565
- "grad_norm": 0.026568621397018433,
566
  "learning_rate": 2.0821256038647345e-05,
567
- "loss": 0.0011,
568
  "step": 720
569
  },
570
  {
571
  "epoch": 6.034782608695652,
572
- "grad_norm": 0.029902568086981773,
573
  "learning_rate": 2.0338164251207732e-05,
574
  "loss": 0.0009,
575
  "step": 730
576
  },
577
  {
578
  "epoch": 6.043478260869565,
579
- "grad_norm": 0.01757122576236725,
580
  "learning_rate": 1.985507246376812e-05,
581
  "loss": 0.0008,
582
  "step": 740
583
  },
584
  {
585
  "epoch": 6.052173913043478,
586
- "grad_norm": 0.02430758625268936,
587
  "learning_rate": 1.9371980676328502e-05,
588
- "loss": 0.0041,
589
  "step": 750
590
  },
591
  {
592
  "epoch": 6.060869565217391,
593
- "grad_norm": 0.013940953649580479,
594
  "learning_rate": 1.888888888888889e-05,
595
- "loss": 0.0019,
596
  "step": 760
597
  },
598
  {
599
  "epoch": 6.069565217391304,
600
- "grad_norm": 0.01289789192378521,
601
  "learning_rate": 1.8405797101449276e-05,
602
- "loss": 0.001,
603
  "step": 770
604
  },
605
  {
606
  "epoch": 6.078260869565217,
607
- "grad_norm": 0.012633569538593292,
608
  "learning_rate": 1.7922705314009663e-05,
609
- "loss": 0.0008,
610
  "step": 780
611
  },
612
  {
613
  "epoch": 6.086956521739131,
614
- "grad_norm": 0.02719777263700962,
615
  "learning_rate": 1.7439613526570046e-05,
616
- "loss": 0.0013,
617
  "step": 790
618
  },
619
  {
620
  "epoch": 6.095652173913043,
621
- "grad_norm": 0.01544960681349039,
622
  "learning_rate": 1.6956521739130433e-05,
623
- "loss": 0.0009,
624
  "step": 800
625
  },
626
  {
627
  "epoch": 6.1,
628
- "eval_accuracy": 0.7073170731707317,
629
- "eval_loss": 1.3622010946273804,
630
- "eval_runtime": 6.1913,
631
- "eval_samples_per_second": 13.244,
632
- "eval_steps_per_second": 3.392,
633
  "step": 805
634
  },
635
  {
636
  "epoch": 7.004347826086956,
637
- "grad_norm": 0.014636457897722721,
638
  "learning_rate": 1.6473429951690824e-05,
639
- "loss": 0.0022,
640
  "step": 810
641
  },
642
  {
643
  "epoch": 7.01304347826087,
644
- "grad_norm": 0.011960177682340145,
645
  "learning_rate": 1.599033816425121e-05,
646
  "loss": 0.0009,
647
  "step": 820
648
  },
649
  {
650
  "epoch": 7.021739130434782,
651
- "grad_norm": 0.025420162826776505,
652
  "learning_rate": 1.5507246376811597e-05,
653
  "loss": 0.0008,
654
  "step": 830
655
  },
656
  {
657
  "epoch": 7.030434782608696,
658
- "grad_norm": 0.015812881290912628,
659
  "learning_rate": 1.5024154589371981e-05,
660
- "loss": 0.0009,
661
  "step": 840
662
  },
663
  {
664
  "epoch": 7.039130434782609,
665
- "grad_norm": 0.012759621255099773,
666
  "learning_rate": 1.4541062801932368e-05,
667
  "loss": 0.0008,
668
  "step": 850
669
  },
670
  {
671
  "epoch": 7.047826086956522,
672
- "grad_norm": 0.013558686710894108,
673
  "learning_rate": 1.4057971014492755e-05,
674
- "loss": 0.0008,
675
  "step": 860
676
  },
677
  {
678
  "epoch": 7.056521739130435,
679
- "grad_norm": 0.009738407097756863,
680
  "learning_rate": 1.3574879227053142e-05,
681
- "loss": 0.0008,
682
  "step": 870
683
  },
684
  {
685
  "epoch": 7.065217391304348,
686
- "grad_norm": 0.020090168341994286,
687
  "learning_rate": 1.3091787439613527e-05,
688
- "loss": 0.0008,
689
  "step": 880
690
  },
691
  {
692
  "epoch": 7.073913043478261,
693
- "grad_norm": 0.00872560404241085,
694
  "learning_rate": 1.2608695652173914e-05,
695
- "loss": 0.0008,
696
  "step": 890
697
  },
698
  {
699
  "epoch": 7.082608695652174,
700
- "grad_norm": 0.0459994301199913,
701
  "learning_rate": 1.21256038647343e-05,
702
- "loss": 0.0009,
703
  "step": 900
704
  },
705
  {
706
  "epoch": 7.091304347826087,
707
- "grad_norm": 0.015241548418998718,
708
  "learning_rate": 1.1642512077294687e-05,
709
  "loss": 0.0007,
710
  "step": 910
711
  },
712
  {
713
  "epoch": 7.1,
714
- "grad_norm": 0.019985787570476532,
715
  "learning_rate": 1.1159420289855073e-05,
716
- "loss": 0.0008,
717
  "step": 920
718
  },
719
  {
720
  "epoch": 7.1,
721
- "eval_accuracy": 0.7073170731707317,
722
- "eval_loss": 1.3999933004379272,
723
- "eval_runtime": 6.3918,
724
- "eval_samples_per_second": 12.829,
725
- "eval_steps_per_second": 3.285,
726
  "step": 920
727
  },
728
  {
729
  "epoch": 8.008695652173913,
730
- "grad_norm": 0.009398338384926319,
731
  "learning_rate": 1.067632850241546e-05,
732
- "loss": 0.0007,
733
  "step": 930
734
  },
735
  {
736
  "epoch": 8.017391304347827,
737
- "grad_norm": 0.026491643860936165,
738
  "learning_rate": 1.0193236714975846e-05,
739
  "loss": 0.0007,
740
  "step": 940
741
  },
742
  {
743
  "epoch": 8.02608695652174,
744
- "grad_norm": 0.015245695598423481,
745
  "learning_rate": 9.710144927536233e-06,
746
- "loss": 0.0007,
747
  "step": 950
748
  },
749
  {
750
  "epoch": 8.034782608695652,
751
- "grad_norm": 0.014611267484724522,
752
  "learning_rate": 9.227053140096618e-06,
753
  "loss": 0.0008,
754
  "step": 960
755
  },
756
  {
757
  "epoch": 8.043478260869565,
758
- "grad_norm": 0.023189160972833633,
759
  "learning_rate": 8.743961352657005e-06,
760
- "loss": 0.0008,
761
  "step": 970
762
  },
763
  {
764
  "epoch": 8.052173913043479,
765
- "grad_norm": 0.01567276194691658,
766
  "learning_rate": 8.26086956521739e-06,
767
- "loss": 0.0008,
768
  "step": 980
769
  },
770
  {
771
  "epoch": 8.060869565217391,
772
- "grad_norm": 0.0071034678258001804,
773
  "learning_rate": 7.777777777777777e-06,
774
  "loss": 0.0006,
775
  "step": 990
776
  },
777
  {
778
  "epoch": 8.069565217391304,
779
- "grad_norm": 0.00896136648952961,
780
  "learning_rate": 7.294685990338164e-06,
781
  "loss": 0.0006,
782
  "step": 1000
783
  },
784
  {
785
  "epoch": 8.078260869565218,
786
- "grad_norm": 0.012383806519210339,
787
  "learning_rate": 6.811594202898551e-06,
788
  "loss": 0.0008,
789
  "step": 1010
790
  },
791
  {
792
  "epoch": 8.08695652173913,
793
- "grad_norm": 0.02067715860903263,
794
  "learning_rate": 6.328502415458938e-06,
795
  "loss": 0.0007,
796
  "step": 1020
797
  },
798
  {
799
  "epoch": 8.095652173913043,
800
- "grad_norm": 0.02547420747578144,
801
  "learning_rate": 5.845410628019324e-06,
802
- "loss": 0.0007,
803
  "step": 1030
804
  },
805
  {
806
  "epoch": 8.1,
807
- "eval_accuracy": 0.7073170731707317,
808
- "eval_loss": 1.401489496231079,
809
- "eval_runtime": 6.1399,
810
- "eval_samples_per_second": 13.355,
811
- "eval_steps_per_second": 3.42,
812
  "step": 1035
813
  },
814
  {
815
  "epoch": 9.004347826086956,
816
- "grad_norm": 0.012581247836351395,
817
  "learning_rate": 5.36231884057971e-06,
818
- "loss": 0.0007,
819
  "step": 1040
820
  },
821
  {
822
  "epoch": 9.013043478260869,
823
- "grad_norm": 0.016566790640354156,
824
  "learning_rate": 4.879227053140096e-06,
825
  "loss": 0.0006,
826
  "step": 1050
827
  },
828
  {
829
  "epoch": 9.021739130434783,
830
- "grad_norm": 0.03303120657801628,
831
  "learning_rate": 4.396135265700483e-06,
832
- "loss": 0.0008,
833
  "step": 1060
834
  },
835
  {
836
  "epoch": 9.030434782608696,
837
- "grad_norm": 0.015636784955859184,
838
  "learning_rate": 3.91304347826087e-06,
839
- "loss": 0.0007,
840
  "step": 1070
841
  },
842
  {
843
  "epoch": 9.039130434782608,
844
- "grad_norm": 0.015506476163864136,
845
  "learning_rate": 3.4299516908212565e-06,
846
- "loss": 0.0008,
847
  "step": 1080
848
  },
849
  {
850
  "epoch": 9.047826086956523,
851
- "grad_norm": 0.01221100240945816,
852
  "learning_rate": 2.9468599033816426e-06,
853
- "loss": 0.0007,
854
  "step": 1090
855
  },
856
  {
857
  "epoch": 9.056521739130435,
858
- "grad_norm": 0.011791340075433254,
859
  "learning_rate": 2.463768115942029e-06,
860
- "loss": 0.0007,
861
  "step": 1100
862
  },
863
  {
864
  "epoch": 9.065217391304348,
865
- "grad_norm": 0.025301584973931313,
866
  "learning_rate": 1.9806763285024155e-06,
867
- "loss": 0.0008,
868
  "step": 1110
869
  },
870
  {
871
  "epoch": 9.07391304347826,
872
- "grad_norm": 0.014775657095015049,
873
  "learning_rate": 1.497584541062802e-06,
874
  "loss": 0.0007,
875
  "step": 1120
876
  },
877
  {
878
  "epoch": 9.082608695652175,
879
- "grad_norm": 0.025027761235833168,
880
  "learning_rate": 1.0144927536231885e-06,
881
- "loss": 0.0007,
882
  "step": 1130
883
  },
884
  {
885
  "epoch": 9.091304347826087,
886
- "grad_norm": 0.007070675026625395,
887
  "learning_rate": 5.314009661835749e-07,
888
  "loss": 0.0006,
889
  "step": 1140
890
  },
891
  {
892
  "epoch": 9.1,
893
- "grad_norm": 0.011773703619837761,
894
  "learning_rate": 4.8309178743961356e-08,
895
- "loss": 0.0007,
896
  "step": 1150
897
  },
898
  {
899
  "epoch": 9.1,
900
- "eval_accuracy": 0.7073170731707317,
901
- "eval_loss": 1.4007967710494995,
902
- "eval_runtime": 6.2852,
903
- "eval_samples_per_second": 13.046,
904
- "eval_steps_per_second": 3.341,
905
  "step": 1150
906
  },
907
  {
908
  "epoch": 9.1,
909
  "step": 1150,
910
  "total_flos": 5.732152700888678e+18,
911
- "train_loss": 0.30212519713110575,
912
- "train_runtime": 1217.5297,
913
- "train_samples_per_second": 3.778,
914
- "train_steps_per_second": 0.945
915
  },
916
  {
917
  "epoch": 9.1,
918
- "eval_accuracy": 0.7073170731707317,
919
- "eval_loss": 1.3381569385528564,
920
- "eval_runtime": 5.7056,
921
- "eval_samples_per_second": 14.372,
922
- "eval_steps_per_second": 3.681,
923
  "step": 1150
924
  }
925
  ],
 
1
  {
2
+ "best_global_step": 460,
3
+ "best_metric": 0.6951219512195121,
4
+ "best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_4/checkpoint-460",
5
  "epoch": 9.1,
6
  "eval_steps": 500,
7
  "global_step": 1150,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.008695652173913044,
14
+ "grad_norm": 6.4240241050720215,
15
  "learning_rate": 3.91304347826087e-06,
16
+ "loss": 1.9844,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.017391304347826087,
21
+ "grad_norm": 5.0679426193237305,
22
  "learning_rate": 8.26086956521739e-06,
23
+ "loss": 1.9772,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.02608695652173913,
28
+ "grad_norm": 7.605220794677734,
29
  "learning_rate": 1.2608695652173914e-05,
30
+ "loss": 1.854,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.034782608695652174,
35
+ "grad_norm": 8.32487964630127,
36
  "learning_rate": 1.6956521739130433e-05,
37
+ "loss": 1.9078,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.043478260869565216,
42
+ "grad_norm": 9.476813316345215,
43
  "learning_rate": 2.1304347826086958e-05,
44
+ "loss": 1.6298,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.05217391304347826,
49
+ "grad_norm": 12.970176696777344,
50
  "learning_rate": 2.5652173913043483e-05,
51
+ "loss": 1.791,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.06086956521739131,
56
+ "grad_norm": 12.027438163757324,
57
  "learning_rate": 3e-05,
58
+ "loss": 1.6904,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.06956521739130435,
63
+ "grad_norm": 9.076032638549805,
64
  "learning_rate": 3.4347826086956526e-05,
65
+ "loss": 1.6026,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.0782608695652174,
70
+ "grad_norm": 9.857560157775879,
71
  "learning_rate": 3.869565217391305e-05,
72
+ "loss": 1.8465,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.08695652173913043,
77
+ "grad_norm": 10.760807991027832,
78
  "learning_rate": 4.304347826086957e-05,
79
+ "loss": 1.5482,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.09565217391304348,
84
+ "grad_norm": 13.033352851867676,
85
  "learning_rate": 4.739130434782609e-05,
86
+ "loss": 1.3814,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.1,
91
+ "eval_accuracy": 0.4268292682926829,
92
+ "eval_loss": 1.5108782052993774,
93
+ "eval_runtime": 6.2348,
94
+ "eval_samples_per_second": 13.152,
95
+ "eval_steps_per_second": 3.368,
96
  "step": 115
97
  },
98
  {
99
  "epoch": 1.0043478260869565,
100
+ "grad_norm": 10.566363334655762,
101
  "learning_rate": 4.980676328502415e-05,
102
+ "loss": 1.2744,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 1.0130434782608695,
107
+ "grad_norm": 9.955045700073242,
108
  "learning_rate": 4.932367149758454e-05,
109
+ "loss": 1.0512,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0217391304347827,
114
+ "grad_norm": 21.17795181274414,
115
  "learning_rate": 4.884057971014493e-05,
116
+ "loss": 1.0042,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.0304347826086957,
121
+ "grad_norm": 8.381865501403809,
122
  "learning_rate": 4.8357487922705316e-05,
123
+ "loss": 0.7597,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.0391304347826087,
128
+ "grad_norm": 10.130925178527832,
129
  "learning_rate": 4.7874396135265706e-05,
130
+ "loss": 0.9822,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.0478260869565217,
135
+ "grad_norm": 17.177282333374023,
136
  "learning_rate": 4.739130434782609e-05,
137
+ "loss": 0.8898,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.0565217391304347,
142
+ "grad_norm": 20.3695068359375,
143
  "learning_rate": 4.690821256038648e-05,
144
+ "loss": 0.8532,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.065217391304348,
149
+ "grad_norm": 15.664092063903809,
150
  "learning_rate": 4.642512077294686e-05,
151
+ "loss": 0.873,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.0739130434782609,
156
+ "grad_norm": 16.01185417175293,
157
  "learning_rate": 4.594202898550725e-05,
158
+ "loss": 1.0756,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.0826086956521739,
163
+ "grad_norm": 19.479690551757812,
164
  "learning_rate": 4.545893719806764e-05,
165
+ "loss": 1.3473,
166
  "step": 210
167
  },
168
  {
169
  "epoch": 1.0913043478260869,
170
+ "grad_norm": 14.130785942077637,
171
  "learning_rate": 4.497584541062802e-05,
172
+ "loss": 0.7922,
173
  "step": 220
174
  },
175
  {
176
  "epoch": 1.1,
177
+ "grad_norm": 9.247044563293457,
178
  "learning_rate": 4.449275362318841e-05,
179
+ "loss": 0.776,
180
  "step": 230
181
  },
182
  {
183
  "epoch": 1.1,
184
+ "eval_accuracy": 0.573170731707317,
185
+ "eval_loss": 1.0662370920181274,
186
+ "eval_runtime": 6.0024,
187
+ "eval_samples_per_second": 13.661,
188
+ "eval_steps_per_second": 3.499,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 2.008695652173913,
193
+ "grad_norm": 4.517851829528809,
194
  "learning_rate": 4.4009661835748794e-05,
195
+ "loss": 0.2886,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 2.017391304347826,
200
+ "grad_norm": 3.206162452697754,
201
  "learning_rate": 4.352657004830918e-05,
202
+ "loss": 0.4656,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 2.026086956521739,
207
+ "grad_norm": 1.1080957651138306,
208
  "learning_rate": 4.304347826086957e-05,
209
+ "loss": 0.2634,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.034782608695652,
214
+ "grad_norm": 5.800728797912598,
215
  "learning_rate": 4.256038647342995e-05,
216
+ "loss": 0.222,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.0434782608695654,
221
+ "grad_norm": 0.48055946826934814,
222
  "learning_rate": 4.207729468599034e-05,
223
+ "loss": 0.1454,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.0521739130434784,
228
+ "grad_norm": 2.7402403354644775,
229
  "learning_rate": 4.1594202898550726e-05,
230
+ "loss": 0.1433,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.0608695652173914,
235
+ "grad_norm": 8.421381950378418,
236
  "learning_rate": 4.111111111111111e-05,
237
+ "loss": 0.3145,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.0695652173913044,
242
+ "grad_norm": 4.506998538970947,
243
  "learning_rate": 4.06280193236715e-05,
244
+ "loss": 0.2138,
245
  "step": 310
246
  },
247
  {
248
  "epoch": 2.0782608695652174,
249
+ "grad_norm": 15.715832710266113,
250
  "learning_rate": 4.014492753623188e-05,
251
+ "loss": 0.2296,
252
  "step": 320
253
  },
254
  {
255
  "epoch": 2.0869565217391304,
256
+ "grad_norm": 1.7134050130844116,
257
  "learning_rate": 3.966183574879227e-05,
258
+ "loss": 0.0711,
259
  "step": 330
260
  },
261
  {
262
  "epoch": 2.0956521739130434,
263
+ "grad_norm": 3.08317232131958,
264
  "learning_rate": 3.9178743961352657e-05,
265
+ "loss": 0.3286,
266
  "step": 340
267
  },
268
  {
269
  "epoch": 2.1,
270
+ "eval_accuracy": 0.6463414634146342,
271
+ "eval_loss": 1.0357838869094849,
272
+ "eval_runtime": 6.0464,
273
+ "eval_samples_per_second": 13.562,
274
+ "eval_steps_per_second": 3.473,
275
  "step": 345
276
  },
277
  {
278
  "epoch": 3.0043478260869567,
279
+ "grad_norm": 0.37449759244918823,
280
  "learning_rate": 3.869565217391305e-05,
281
+ "loss": 0.1835,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 3.0130434782608697,
286
+ "grad_norm": 0.18086479604244232,
287
  "learning_rate": 3.821256038647344e-05,
288
+ "loss": 0.0584,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 3.0217391304347827,
293
+ "grad_norm": 0.32367080450057983,
294
  "learning_rate": 3.772946859903382e-05,
295
+ "loss": 0.061,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 3.0304347826086957,
300
+ "grad_norm": 0.264346718788147,
301
  "learning_rate": 3.7246376811594204e-05,
302
+ "loss": 0.0423,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 3.0391304347826087,
307
+ "grad_norm": 8.53426456451416,
308
  "learning_rate": 3.6763285024154594e-05,
309
+ "loss": 0.0256,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.0478260869565217,
314
+ "grad_norm": 0.227620467543602,
315
  "learning_rate": 3.628019323671498e-05,
316
+ "loss": 0.0892,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.0565217391304347,
321
+ "grad_norm": 0.21275293827056885,
322
  "learning_rate": 3.579710144927537e-05,
323
+ "loss": 0.0628,
324
  "step": 410
325
  },
326
  {
327
  "epoch": 3.0652173913043477,
328
+ "grad_norm": 0.09173234552145004,
329
  "learning_rate": 3.531400966183575e-05,
330
+ "loss": 0.0088,
331
  "step": 420
332
  },
333
  {
334
  "epoch": 3.0739130434782607,
335
+ "grad_norm": 0.3767607510089874,
336
  "learning_rate": 3.4830917874396135e-05,
337
+ "loss": 0.0297,
338
  "step": 430
339
  },
340
  {
341
  "epoch": 3.082608695652174,
342
+ "grad_norm": 0.8464566469192505,
343
  "learning_rate": 3.4347826086956526e-05,
344
+ "loss": 0.0493,
345
  "step": 440
346
  },
347
  {
348
  "epoch": 3.091304347826087,
349
+ "grad_norm": 0.3951728045940399,
350
  "learning_rate": 3.386473429951691e-05,
351
+ "loss": 0.0953,
352
  "step": 450
353
  },
354
  {
355
  "epoch": 3.1,
356
+ "grad_norm": 1.0671485662460327,
357
  "learning_rate": 3.338164251207729e-05,
358
+ "loss": 0.1288,
359
  "step": 460
360
  },
361
  {
362
  "epoch": 3.1,
363
+ "eval_accuracy": 0.6951219512195121,
364
+ "eval_loss": 1.2568752765655518,
365
+ "eval_runtime": 6.1334,
366
+ "eval_samples_per_second": 13.369,
367
+ "eval_steps_per_second": 3.424,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 4.008695652173913,
372
+ "grad_norm": 3.649686813354492,
373
  "learning_rate": 3.289855072463768e-05,
374
+ "loss": 0.0122,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 4.017391304347826,
379
+ "grad_norm": 1.168199062347412,
380
  "learning_rate": 3.2415458937198066e-05,
381
+ "loss": 0.0274,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 4.026086956521739,
386
+ "grad_norm": 0.3025151789188385,
387
  "learning_rate": 3.1932367149758457e-05,
388
+ "loss": 0.0088,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 4.034782608695652,
393
+ "grad_norm": 0.033977147191762924,
394
  "learning_rate": 3.144927536231884e-05,
395
+ "loss": 0.0049,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 4.043478260869565,
400
+ "grad_norm": 0.22555121779441833,
401
  "learning_rate": 3.0966183574879224e-05,
402
+ "loss": 0.0073,
403
  "step": 510
404
  },
405
  {
406
  "epoch": 4.052173913043478,
407
+ "grad_norm": 1.255247712135315,
408
  "learning_rate": 3.0483091787439617e-05,
409
+ "loss": 0.0074,
410
  "step": 520
411
  },
412
  {
413
  "epoch": 4.060869565217391,
414
+ "grad_norm": 0.27926549315452576,
415
  "learning_rate": 3e-05,
416
+ "loss": 0.0117,
417
  "step": 530
418
  },
419
  {
420
  "epoch": 4.069565217391304,
421
+ "grad_norm": 0.019165927544236183,
422
  "learning_rate": 2.951690821256039e-05,
423
+ "loss": 0.002,
424
  "step": 540
425
  },
426
  {
427
  "epoch": 4.078260869565217,
428
+ "grad_norm": 0.042612988501787186,
429
  "learning_rate": 2.9033816425120775e-05,
430
+ "loss": 0.0192,
431
  "step": 550
432
  },
433
  {
434
  "epoch": 4.086956521739131,
435
+ "grad_norm": 0.05526461824774742,
436
  "learning_rate": 2.8550724637681158e-05,
437
+ "loss": 0.0021,
438
  "step": 560
439
  },
440
  {
441
  "epoch": 4.095652173913043,
442
+ "grad_norm": 0.03402889147400856,
443
  "learning_rate": 2.806763285024155e-05,
444
+ "loss": 0.0023,
445
  "step": 570
446
  },
447
  {
448
  "epoch": 4.1,
449
+ "eval_accuracy": 0.6951219512195121,
450
+ "eval_loss": 1.3209176063537598,
451
+ "eval_runtime": 6.1384,
452
+ "eval_samples_per_second": 13.359,
453
+ "eval_steps_per_second": 3.421,
454
  "step": 575
455
  },
456
  {
457
  "epoch": 5.004347826086956,
458
+ "grad_norm": 0.023641662672162056,
459
  "learning_rate": 2.7584541062801932e-05,
460
+ "loss": 0.0017,
461
  "step": 580
462
  },
463
  {
464
  "epoch": 5.01304347826087,
465
+ "grad_norm": 0.029742104932665825,
466
  "learning_rate": 2.7101449275362322e-05,
467
+ "loss": 0.0014,
468
  "step": 590
469
  },
470
  {
471
  "epoch": 5.021739130434782,
472
+ "grad_norm": 0.1183994710445404,
473
  "learning_rate": 2.6618357487922706e-05,
474
+ "loss": 0.0016,
475
  "step": 600
476
  },
477
  {
478
  "epoch": 5.030434782608696,
479
+ "grad_norm": 0.033052194863557816,
480
  "learning_rate": 2.6135265700483093e-05,
481
+ "loss": 0.001,
482
  "step": 610
483
  },
484
  {
485
  "epoch": 5.039130434782609,
486
+ "grad_norm": 0.016786742955446243,
487
  "learning_rate": 2.5652173913043483e-05,
488
+ "loss": 0.0023,
489
  "step": 620
490
  },
491
  {
492
  "epoch": 5.047826086956522,
493
+ "grad_norm": 0.020967165008187294,
494
  "learning_rate": 2.5169082125603866e-05,
495
+ "loss": 0.0037,
496
  "step": 630
497
  },
498
  {
499
  "epoch": 5.056521739130435,
500
+ "grad_norm": 0.07056716829538345,
501
  "learning_rate": 2.4685990338164253e-05,
502
+ "loss": 0.004,
503
  "step": 640
504
  },
505
  {
506
  "epoch": 5.065217391304348,
507
+ "grad_norm": 0.993996798992157,
508
  "learning_rate": 2.420289855072464e-05,
509
+ "loss": 0.0022,
510
  "step": 650
511
  },
512
  {
513
  "epoch": 5.073913043478261,
514
+ "grad_norm": 0.028120990842580795,
515
  "learning_rate": 2.3719806763285024e-05,
516
+ "loss": 0.001,
517
  "step": 660
518
  },
519
  {
520
  "epoch": 5.082608695652174,
521
+ "grad_norm": 0.025797342881560326,
522
  "learning_rate": 2.323671497584541e-05,
523
+ "loss": 0.0013,
524
  "step": 670
525
  },
526
  {
527
  "epoch": 5.091304347826087,
528
+ "grad_norm": 0.054205186665058136,
529
  "learning_rate": 2.2753623188405797e-05,
530
+ "loss": 0.0045,
531
  "step": 680
532
  },
533
  {
534
  "epoch": 5.1,
535
+ "grad_norm": 0.02057654596865177,
536
  "learning_rate": 2.2270531400966184e-05,
537
  "loss": 0.0012,
538
  "step": 690
539
  },
540
  {
541
  "epoch": 5.1,
542
+ "eval_accuracy": 0.6707317073170732,
543
+ "eval_loss": 1.2574602365493774,
544
+ "eval_runtime": 6.097,
545
+ "eval_samples_per_second": 13.449,
546
+ "eval_steps_per_second": 3.444,
547
  "step": 690
548
  },
549
  {
550
  "epoch": 6.008695652173913,
551
+ "grad_norm": 0.028874915093183517,
552
  "learning_rate": 2.178743961352657e-05,
553
+ "loss": 0.0009,
554
  "step": 700
555
  },
556
  {
557
  "epoch": 6.017391304347826,
558
+ "grad_norm": 0.01535830833017826,
559
  "learning_rate": 2.1304347826086958e-05,
560
+ "loss": 0.0011,
561
  "step": 710
562
  },
563
  {
564
  "epoch": 6.026086956521739,
565
+ "grad_norm": 0.020613593980669975,
566
  "learning_rate": 2.0821256038647345e-05,
567
+ "loss": 0.0028,
568
  "step": 720
569
  },
570
  {
571
  "epoch": 6.034782608695652,
572
+ "grad_norm": 0.031190281733870506,
573
  "learning_rate": 2.0338164251207732e-05,
574
  "loss": 0.0009,
575
  "step": 730
576
  },
577
  {
578
  "epoch": 6.043478260869565,
579
+ "grad_norm": 0.015504710376262665,
580
  "learning_rate": 1.985507246376812e-05,
581
  "loss": 0.0008,
582
  "step": 740
583
  },
584
  {
585
  "epoch": 6.052173913043478,
586
+ "grad_norm": 0.015204375609755516,
587
  "learning_rate": 1.9371980676328502e-05,
588
+ "loss": 0.0018,
589
  "step": 750
590
  },
591
  {
592
  "epoch": 6.060869565217391,
593
+ "grad_norm": 0.010091892443597317,
594
  "learning_rate": 1.888888888888889e-05,
595
+ "loss": 0.0011,
596
  "step": 760
597
  },
598
  {
599
  "epoch": 6.069565217391304,
600
+ "grad_norm": 0.027177872136235237,
601
  "learning_rate": 1.8405797101449276e-05,
602
+ "loss": 0.0009,
603
  "step": 770
604
  },
605
  {
606
  "epoch": 6.078260869565217,
607
+ "grad_norm": 0.011965460143983364,
608
  "learning_rate": 1.7922705314009663e-05,
609
+ "loss": 0.0007,
610
  "step": 780
611
  },
612
  {
613
  "epoch": 6.086956521739131,
614
+ "grad_norm": 0.014813700690865517,
615
  "learning_rate": 1.7439613526570046e-05,
616
+ "loss": 0.001,
617
  "step": 790
618
  },
619
  {
620
  "epoch": 6.095652173913043,
621
+ "grad_norm": 0.016053922474384308,
622
  "learning_rate": 1.6956521739130433e-05,
623
+ "loss": 0.0008,
624
  "step": 800
625
  },
626
  {
627
  "epoch": 6.1,
628
+ "eval_accuracy": 0.6829268292682927,
629
+ "eval_loss": 1.2972846031188965,
630
+ "eval_runtime": 6.1036,
631
+ "eval_samples_per_second": 13.435,
632
+ "eval_steps_per_second": 3.441,
633
  "step": 805
634
  },
635
  {
636
  "epoch": 7.004347826086956,
637
+ "grad_norm": 0.020354464650154114,
638
  "learning_rate": 1.6473429951690824e-05,
639
+ "loss": 0.0014,
640
  "step": 810
641
  },
642
  {
643
  "epoch": 7.01304347826087,
644
+ "grad_norm": 0.013575357384979725,
645
  "learning_rate": 1.599033816425121e-05,
646
  "loss": 0.0009,
647
  "step": 820
648
  },
649
  {
650
  "epoch": 7.021739130434782,
651
+ "grad_norm": 0.022664641961455345,
652
  "learning_rate": 1.5507246376811597e-05,
653
  "loss": 0.0008,
654
  "step": 830
655
  },
656
  {
657
  "epoch": 7.030434782608696,
658
+ "grad_norm": 0.026450317353010178,
659
  "learning_rate": 1.5024154589371981e-05,
660
+ "loss": 0.0007,
661
  "step": 840
662
  },
663
  {
664
  "epoch": 7.039130434782609,
665
+ "grad_norm": 0.013138238340616226,
666
  "learning_rate": 1.4541062801932368e-05,
667
  "loss": 0.0008,
668
  "step": 850
669
  },
670
  {
671
  "epoch": 7.047826086956522,
672
+ "grad_norm": 0.02052425779402256,
673
  "learning_rate": 1.4057971014492755e-05,
674
+ "loss": 0.0007,
675
  "step": 860
676
  },
677
  {
678
  "epoch": 7.056521739130435,
679
+ "grad_norm": 0.012927724979817867,
680
  "learning_rate": 1.3574879227053142e-05,
681
+ "loss": 0.0007,
682
  "step": 870
683
  },
684
  {
685
  "epoch": 7.065217391304348,
686
+ "grad_norm": 0.014560637064278126,
687
  "learning_rate": 1.3091787439613527e-05,
688
+ "loss": 0.0007,
689
  "step": 880
690
  },
691
  {
692
  "epoch": 7.073913043478261,
693
+ "grad_norm": 0.008629231713712215,
694
  "learning_rate": 1.2608695652173914e-05,
695
+ "loss": 0.0007,
696
  "step": 890
697
  },
698
  {
699
  "epoch": 7.082608695652174,
700
+ "grad_norm": 0.0358402356505394,
701
  "learning_rate": 1.21256038647343e-05,
702
+ "loss": 0.0008,
703
  "step": 900
704
  },
705
  {
706
  "epoch": 7.091304347826087,
707
+ "grad_norm": 0.014950945042073727,
708
  "learning_rate": 1.1642512077294687e-05,
709
  "loss": 0.0007,
710
  "step": 910
711
  },
712
  {
713
  "epoch": 7.1,
714
+ "grad_norm": 0.01364809088408947,
715
  "learning_rate": 1.1159420289855073e-05,
716
+ "loss": 0.0007,
717
  "step": 920
718
  },
719
  {
720
  "epoch": 7.1,
721
+ "eval_accuracy": 0.6829268292682927,
722
+ "eval_loss": 1.3180800676345825,
723
+ "eval_runtime": 6.3025,
724
+ "eval_samples_per_second": 13.011,
725
+ "eval_steps_per_second": 3.332,
726
  "step": 920
727
  },
728
  {
729
  "epoch": 8.008695652173913,
730
+ "grad_norm": 0.008946227841079235,
731
  "learning_rate": 1.067632850241546e-05,
732
+ "loss": 0.0006,
733
  "step": 930
734
  },
735
  {
736
  "epoch": 8.017391304347827,
737
+ "grad_norm": 0.024497035890817642,
738
  "learning_rate": 1.0193236714975846e-05,
739
  "loss": 0.0007,
740
  "step": 940
741
  },
742
  {
743
  "epoch": 8.02608695652174,
744
+ "grad_norm": 0.01063747052103281,
745
  "learning_rate": 9.710144927536233e-06,
746
+ "loss": 0.0006,
747
  "step": 950
748
  },
749
  {
750
  "epoch": 8.034782608695652,
751
+ "grad_norm": 0.014025327749550343,
752
  "learning_rate": 9.227053140096618e-06,
753
  "loss": 0.0008,
754
  "step": 960
755
  },
756
  {
757
  "epoch": 8.043478260869565,
758
+ "grad_norm": 0.021205030381679535,
759
  "learning_rate": 8.743961352657005e-06,
760
+ "loss": 0.0007,
761
  "step": 970
762
  },
763
  {
764
  "epoch": 8.052173913043479,
765
+ "grad_norm": 0.009876050055027008,
766
  "learning_rate": 8.26086956521739e-06,
767
+ "loss": 0.0007,
768
  "step": 980
769
  },
770
  {
771
  "epoch": 8.060869565217391,
772
+ "grad_norm": 0.007320360280573368,
773
  "learning_rate": 7.777777777777777e-06,
774
  "loss": 0.0006,
775
  "step": 990
776
  },
777
  {
778
  "epoch": 8.069565217391304,
779
+ "grad_norm": 0.007261498365551233,
780
  "learning_rate": 7.294685990338164e-06,
781
  "loss": 0.0006,
782
  "step": 1000
783
  },
784
  {
785
  "epoch": 8.078260869565218,
786
+ "grad_norm": 0.013149461708962917,
787
  "learning_rate": 6.811594202898551e-06,
788
  "loss": 0.0008,
789
  "step": 1010
790
  },
791
  {
792
  "epoch": 8.08695652173913,
793
+ "grad_norm": 0.013871278613805771,
794
  "learning_rate": 6.328502415458938e-06,
795
  "loss": 0.0007,
796
  "step": 1020
797
  },
798
  {
799
  "epoch": 8.095652173913043,
800
+ "grad_norm": 0.018679574131965637,
801
  "learning_rate": 5.845410628019324e-06,
802
+ "loss": 0.0006,
803
  "step": 1030
804
  },
805
  {
806
  "epoch": 8.1,
807
+ "eval_accuracy": 0.6829268292682927,
808
+ "eval_loss": 1.329393982887268,
809
+ "eval_runtime": 6.1832,
810
+ "eval_samples_per_second": 13.262,
811
+ "eval_steps_per_second": 3.396,
812
  "step": 1035
813
  },
814
  {
815
  "epoch": 9.004347826086956,
816
+ "grad_norm": 0.011226188391447067,
817
  "learning_rate": 5.36231884057971e-06,
818
+ "loss": 0.0006,
819
  "step": 1040
820
  },
821
  {
822
  "epoch": 9.013043478260869,
823
+ "grad_norm": 0.017457231879234314,
824
  "learning_rate": 4.879227053140096e-06,
825
  "loss": 0.0006,
826
  "step": 1050
827
  },
828
  {
829
  "epoch": 9.021739130434783,
830
+ "grad_norm": 0.023909136652946472,
831
  "learning_rate": 4.396135265700483e-06,
832
+ "loss": 0.0007,
833
  "step": 1060
834
  },
835
  {
836
  "epoch": 9.030434782608696,
837
+ "grad_norm": 0.00866635050624609,
838
  "learning_rate": 3.91304347826087e-06,
839
+ "loss": 0.0006,
840
  "step": 1070
841
  },
842
  {
843
  "epoch": 9.039130434782608,
844
+ "grad_norm": 0.013021158054471016,
845
  "learning_rate": 3.4299516908212565e-06,
846
+ "loss": 0.0007,
847
  "step": 1080
848
  },
849
  {
850
  "epoch": 9.047826086956523,
851
+ "grad_norm": 0.008441662415862083,
852
  "learning_rate": 2.9468599033816426e-06,
853
+ "loss": 0.0006,
854
  "step": 1090
855
  },
856
  {
857
  "epoch": 9.056521739130435,
858
+ "grad_norm": 0.012296984903514385,
859
  "learning_rate": 2.463768115942029e-06,
860
+ "loss": 0.0006,
861
  "step": 1100
862
  },
863
  {
864
  "epoch": 9.065217391304348,
865
+ "grad_norm": 0.025126850232481956,
866
  "learning_rate": 1.9806763285024155e-06,
867
+ "loss": 0.0007,
868
  "step": 1110
869
  },
870
  {
871
  "epoch": 9.07391304347826,
872
+ "grad_norm": 0.012579759582877159,
873
  "learning_rate": 1.497584541062802e-06,
874
  "loss": 0.0007,
875
  "step": 1120
876
  },
877
  {
878
  "epoch": 9.082608695652175,
879
+ "grad_norm": 0.012673572637140751,
880
  "learning_rate": 1.0144927536231885e-06,
881
+ "loss": 0.0006,
882
  "step": 1130
883
  },
884
  {
885
  "epoch": 9.091304347826087,
886
+ "grad_norm": 0.007186697795987129,
887
  "learning_rate": 5.314009661835749e-07,
888
  "loss": 0.0006,
889
  "step": 1140
890
  },
891
  {
892
  "epoch": 9.1,
893
+ "grad_norm": 0.013606193475425243,
894
  "learning_rate": 4.8309178743961356e-08,
895
+ "loss": 0.0006,
896
  "step": 1150
897
  },
898
  {
899
  "epoch": 9.1,
900
+ "eval_accuracy": 0.6951219512195121,
901
+ "eval_loss": 1.332796573638916,
902
+ "eval_runtime": 6.2824,
903
+ "eval_samples_per_second": 13.052,
904
+ "eval_steps_per_second": 3.343,
905
  "step": 1150
906
  },
907
  {
908
  "epoch": 9.1,
909
  "step": 1150,
910
  "total_flos": 5.732152700888678e+18,
911
+ "train_loss": 0.30071158163492445,
912
+ "train_runtime": 1642.1131,
913
+ "train_samples_per_second": 2.801,
914
+ "train_steps_per_second": 0.7
915
  },
916
  {
917
  "epoch": 9.1,
918
+ "eval_accuracy": 0.6951219512195121,
919
+ "eval_loss": 1.2568752765655518,
920
+ "eval_runtime": 5.6457,
921
+ "eval_samples_per_second": 14.524,
922
+ "eval_steps_per_second": 3.72,
923
  "step": 1150
924
  }
925
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcd726e58dbf901b06a4f42284e673960d4b6d9185a8974cbd9733ea2202cde
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9e0e9937d4d0c4c0ef16698f0cc325dc3c27e015ba4375f247d8997c61dfd6
3
  size 5368