c14kevincardenas commited on
Commit
fe77a92
·
verified ·
1 Parent(s): 855fc23

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,11 @@
1
  ---
2
  library_name: transformers
 
 
3
  tags:
 
 
 
4
  - generated_from_trainer
5
  model-index:
6
  - name: target_hold
@@ -12,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # target_hold
14
 
15
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
  - Loss: 0.8720
18
  - Iou: 0.0008
 
1
  ---
2
  library_name: transformers
3
+ license: apache-2.0
4
+ base_model: facebook/detr-resnet-50
5
  tags:
6
+ - image-regression
7
+ - human-movement
8
+ - vision
9
  - generated_from_trainer
10
  model-index:
11
  - name: target_hold
 
17
 
18
  # target_hold
19
 
20
+ This model is a fine-tuned version of [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) on the c14kevincardenas/beta_caller_284_target_hold dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.8720
23
  - Iou: 0.0008
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_IoU": 0.0,
4
- "eval_loss": 1.3494025468826294,
5
- "eval_runtime": 10.1301,
6
- "eval_samples_per_second": 110.858,
7
- "eval_steps_per_second": 1.777,
8
  "total_flos": 0.0,
9
- "train_loss": 1.3746022605895996,
10
- "train_runtime": 3619.0694,
11
- "train_samples_per_second": 35.142,
12
- "train_steps_per_second": 0.553
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_IoU": 0.0008468033398795426,
4
+ "eval_loss": 0.871971845626831,
5
+ "eval_runtime": 15.8906,
6
+ "eval_samples_per_second": 70.671,
7
+ "eval_steps_per_second": 1.133,
8
  "total_flos": 0.0,
9
+ "train_loss": 0.9022787961959838,
10
+ "train_runtime": 6945.9457,
11
+ "train_samples_per_second": 18.31,
12
+ "train_steps_per_second": 0.288
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_IoU": 0.0,
4
- "eval_loss": 1.3494025468826294,
5
- "eval_runtime": 10.1301,
6
- "eval_samples_per_second": 110.858,
7
- "eval_steps_per_second": 1.777
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_IoU": 0.0008468033398795426,
4
+ "eval_loss": 0.871971845626831,
5
+ "eval_runtime": 15.8906,
6
+ "eval_samples_per_second": 70.671,
7
+ "eval_steps_per_second": 1.133
8
  }
runs/Oct25_18-04-31_galactica.ad.cirange.net/events.out.tfevents.1729886459.galactica.ad.cirange.net.184438.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6680e58d5b569fa6fc518905ce2f201bb5bea5ca4b335027e1eac3782c745948
3
+ size 406
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
- "train_loss": 1.3746022605895996,
5
- "train_runtime": 3619.0694,
6
- "train_samples_per_second": 35.142,
7
- "train_steps_per_second": 0.553
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.9022787961959838,
5
+ "train_runtime": 6945.9457,
6
+ "train_samples_per_second": 18.31,
7
+ "train_steps_per_second": 0.288
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.3494025468826294,
3
  "best_model_checkpoint": "/mnt/ml_drive/kcardenas/target_hold/checkpoint-2000",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
@@ -10,752 +10,752 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
- "grad_norm": 771.6892700195312,
14
  "learning_rate": 5e-06,
15
- "loss": 1.4609,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5,
20
- "grad_norm": 742042.25,
21
  "learning_rate": 1e-05,
22
- "loss": 1.4537,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.75,
27
- "grad_norm": 778067.125,
28
  "learning_rate": 1.5e-05,
29
- "loss": 1.4627,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
- "grad_norm": 913205.0625,
35
  "learning_rate": 2e-05,
36
- "loss": 1.4703,
37
  "step": 100
38
  },
39
  {
40
  "epoch": 1.0,
41
- "eval_IoU": 0.0,
42
- "eval_loss": 1.4399977922439575,
43
- "eval_runtime": 10.5794,
44
- "eval_samples_per_second": 106.15,
45
- "eval_steps_per_second": 1.701,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.25,
50
- "grad_norm": 805371.25,
51
  "learning_rate": 2.5e-05,
52
- "loss": 1.4476,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.5,
57
- "grad_norm": 179319.0625,
58
  "learning_rate": 3e-05,
59
- "loss": 1.4673,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.75,
64
- "grad_norm": 132580.140625,
65
  "learning_rate": 3.5e-05,
66
- "loss": 1.4393,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
- "grad_norm": 4426164.5,
72
  "learning_rate": 4e-05,
73
- "loss": 1.4686,
74
  "step": 200
75
  },
76
  {
77
  "epoch": 2.0,
78
- "eval_IoU": 0.0,
79
- "eval_loss": 1.4274886846542358,
80
- "eval_runtime": 10.333,
81
- "eval_samples_per_second": 108.681,
82
- "eval_steps_per_second": 1.742,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.25,
87
- "grad_norm": 3633516.5,
88
  "learning_rate": 4.5e-05,
89
- "loss": 1.4373,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.5,
94
- "grad_norm": 602330.6875,
95
  "learning_rate": 5e-05,
96
- "loss": 1.4544,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 2.75,
101
- "grad_norm": 3835361.5,
102
  "learning_rate": 4.928571428571429e-05,
103
- "loss": 1.4161,
104
  "step": 275
105
  },
106
  {
107
  "epoch": 3.0,
108
- "grad_norm": 4627148.5,
109
  "learning_rate": 4.8571428571428576e-05,
110
- "loss": 1.463,
111
  "step": 300
112
  },
113
  {
114
  "epoch": 3.0,
115
- "eval_IoU": 0.0,
116
- "eval_loss": 1.5115046501159668,
117
- "eval_runtime": 10.026,
118
- "eval_samples_per_second": 112.009,
119
- "eval_steps_per_second": 1.795,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.25,
124
- "grad_norm": 5056040.5,
125
  "learning_rate": 4.785714285714286e-05,
126
- "loss": 1.4849,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.5,
131
- "grad_norm": 1441977.25,
132
  "learning_rate": 4.714285714285714e-05,
133
- "loss": 1.4628,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 3.75,
138
- "grad_norm": 123561.8515625,
139
  "learning_rate": 4.642857142857143e-05,
140
- "loss": 1.4627,
141
  "step": 375
142
  },
143
  {
144
  "epoch": 4.0,
145
- "grad_norm": 1065073.75,
146
  "learning_rate": 4.5714285714285716e-05,
147
- "loss": 1.4399,
148
  "step": 400
149
  },
150
  {
151
  "epoch": 4.0,
152
- "eval_IoU": 0.0,
153
- "eval_loss": 1.4285613298416138,
154
- "eval_runtime": 10.0206,
155
- "eval_samples_per_second": 112.069,
156
- "eval_steps_per_second": 1.796,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.25,
161
- "grad_norm": 1083903.625,
162
  "learning_rate": 4.5e-05,
163
- "loss": 1.4181,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 4.5,
168
- "grad_norm": 989387.875,
169
  "learning_rate": 4.428571428571428e-05,
170
- "loss": 1.3905,
171
  "step": 450
172
  },
173
  {
174
  "epoch": 4.75,
175
- "grad_norm": 504863.90625,
176
  "learning_rate": 4.3571428571428576e-05,
177
- "loss": 1.4001,
178
  "step": 475
179
  },
180
  {
181
  "epoch": 5.0,
182
- "grad_norm": 1111330.75,
183
  "learning_rate": 4.2857142857142856e-05,
184
- "loss": 1.4252,
185
  "step": 500
186
  },
187
  {
188
  "epoch": 5.0,
189
- "eval_IoU": 0.0,
190
- "eval_loss": 1.4052964448928833,
191
- "eval_runtime": 10.1806,
192
- "eval_samples_per_second": 110.307,
193
- "eval_steps_per_second": 1.768,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.25,
198
- "grad_norm": 20536042.0,
199
  "learning_rate": 4.214285714285714e-05,
200
- "loss": 1.4065,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 5.5,
205
- "grad_norm": 1043716.0625,
206
  "learning_rate": 4.1428571428571437e-05,
207
- "loss": 1.4236,
208
  "step": 550
209
  },
210
  {
211
  "epoch": 5.75,
212
- "grad_norm": 6279096.0,
213
  "learning_rate": 4.0714285714285717e-05,
214
- "loss": 1.4061,
215
  "step": 575
216
  },
217
  {
218
  "epoch": 6.0,
219
- "grad_norm": 1782540.375,
220
  "learning_rate": 4e-05,
221
- "loss": 1.3961,
222
  "step": 600
223
  },
224
  {
225
  "epoch": 6.0,
226
- "eval_IoU": 0.0,
227
- "eval_loss": 1.3607368469238281,
228
- "eval_runtime": 10.205,
229
- "eval_samples_per_second": 110.045,
230
- "eval_steps_per_second": 1.764,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 6.25,
235
- "grad_norm": 2240105.75,
236
  "learning_rate": 3.928571428571429e-05,
237
- "loss": 1.45,
238
  "step": 625
239
  },
240
  {
241
  "epoch": 6.5,
242
- "grad_norm": 778888.9375,
243
  "learning_rate": 3.857142857142858e-05,
244
- "loss": 1.4902,
245
  "step": 650
246
  },
247
  {
248
  "epoch": 6.75,
249
- "grad_norm": 871022.9375,
250
  "learning_rate": 3.785714285714286e-05,
251
- "loss": 1.3991,
252
  "step": 675
253
  },
254
  {
255
  "epoch": 7.0,
256
- "grad_norm": 2324823.5,
257
  "learning_rate": 3.7142857142857143e-05,
258
- "loss": 1.3791,
259
  "step": 700
260
  },
261
  {
262
  "epoch": 7.0,
263
- "eval_IoU": 0.0,
264
- "eval_loss": 1.3694654703140259,
265
- "eval_runtime": 9.9921,
266
- "eval_samples_per_second": 112.389,
267
- "eval_steps_per_second": 1.801,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 7.25,
272
- "grad_norm": 842470.1875,
273
  "learning_rate": 3.642857142857143e-05,
274
- "loss": 1.3698,
275
  "step": 725
276
  },
277
  {
278
  "epoch": 7.5,
279
- "grad_norm": 1414357.5,
280
  "learning_rate": 3.571428571428572e-05,
281
- "loss": 1.3673,
282
  "step": 750
283
  },
284
  {
285
  "epoch": 7.75,
286
- "grad_norm": 1465819.625,
287
  "learning_rate": 3.5e-05,
288
- "loss": 1.3714,
289
  "step": 775
290
  },
291
  {
292
  "epoch": 8.0,
293
- "grad_norm": 5227108.0,
294
  "learning_rate": 3.428571428571429e-05,
295
- "loss": 1.3555,
296
  "step": 800
297
  },
298
  {
299
  "epoch": 8.0,
300
- "eval_IoU": 0.0,
301
- "eval_loss": 1.369844675064087,
302
- "eval_runtime": 10.1046,
303
- "eval_samples_per_second": 111.138,
304
- "eval_steps_per_second": 1.781,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 8.25,
309
- "grad_norm": 1675220.125,
310
  "learning_rate": 3.357142857142857e-05,
311
- "loss": 1.4224,
312
  "step": 825
313
  },
314
  {
315
  "epoch": 8.5,
316
- "grad_norm": 2374479.0,
317
  "learning_rate": 3.285714285714286e-05,
318
- "loss": 1.3854,
319
  "step": 850
320
  },
321
  {
322
  "epoch": 8.75,
323
- "grad_norm": 741768.375,
324
  "learning_rate": 3.2142857142857144e-05,
325
- "loss": 1.3449,
326
  "step": 875
327
  },
328
  {
329
  "epoch": 9.0,
330
- "grad_norm": 4549860.0,
331
  "learning_rate": 3.142857142857143e-05,
332
- "loss": 1.3402,
333
  "step": 900
334
  },
335
  {
336
  "epoch": 9.0,
337
- "eval_IoU": 0.0,
338
- "eval_loss": 1.3592411279678345,
339
- "eval_runtime": 9.9765,
340
- "eval_samples_per_second": 112.564,
341
- "eval_steps_per_second": 1.804,
342
  "step": 900
343
  },
344
  {
345
  "epoch": 9.25,
346
- "grad_norm": 29592694.0,
347
  "learning_rate": 3.071428571428572e-05,
348
- "loss": 1.3638,
349
  "step": 925
350
  },
351
  {
352
  "epoch": 9.5,
353
- "grad_norm": 1362061.625,
354
  "learning_rate": 3e-05,
355
- "loss": 1.3651,
356
  "step": 950
357
  },
358
  {
359
  "epoch": 9.75,
360
- "grad_norm": 4175936.75,
361
  "learning_rate": 2.9285714285714288e-05,
362
- "loss": 1.3495,
363
  "step": 975
364
  },
365
  {
366
  "epoch": 10.0,
367
- "grad_norm": 1573651.75,
368
  "learning_rate": 2.857142857142857e-05,
369
- "loss": 1.3551,
370
  "step": 1000
371
  },
372
  {
373
  "epoch": 10.0,
374
- "eval_IoU": 0.0,
375
- "eval_loss": 1.378575086593628,
376
- "eval_runtime": 10.1575,
377
- "eval_samples_per_second": 110.559,
378
- "eval_steps_per_second": 1.772,
379
  "step": 1000
380
  },
381
  {
382
  "epoch": 10.25,
383
- "grad_norm": 2796916.25,
384
  "learning_rate": 2.785714285714286e-05,
385
- "loss": 1.3547,
386
  "step": 1025
387
  },
388
  {
389
  "epoch": 10.5,
390
- "grad_norm": 5406704.5,
391
  "learning_rate": 2.714285714285714e-05,
392
- "loss": 1.3367,
393
  "step": 1050
394
  },
395
  {
396
  "epoch": 10.75,
397
- "grad_norm": 1702886.625,
398
  "learning_rate": 2.642857142857143e-05,
399
- "loss": 1.3551,
400
  "step": 1075
401
  },
402
  {
403
  "epoch": 11.0,
404
- "grad_norm": 3208807.0,
405
  "learning_rate": 2.5714285714285714e-05,
406
- "loss": 1.3517,
407
  "step": 1100
408
  },
409
  {
410
  "epoch": 11.0,
411
- "eval_IoU": 0.0,
412
- "eval_loss": 1.3585208654403687,
413
- "eval_runtime": 10.1978,
414
- "eval_samples_per_second": 110.122,
415
- "eval_steps_per_second": 1.765,
416
  "step": 1100
417
  },
418
  {
419
  "epoch": 11.25,
420
- "grad_norm": 676928.4375,
421
  "learning_rate": 2.5e-05,
422
- "loss": 1.3503,
423
  "step": 1125
424
  },
425
  {
426
  "epoch": 11.5,
427
- "grad_norm": 2296206.75,
428
  "learning_rate": 2.4285714285714288e-05,
429
- "loss": 1.344,
430
  "step": 1150
431
  },
432
  {
433
  "epoch": 11.75,
434
- "grad_norm": 21383138.0,
435
  "learning_rate": 2.357142857142857e-05,
436
- "loss": 1.3309,
437
  "step": 1175
438
  },
439
  {
440
  "epoch": 12.0,
441
- "grad_norm": 2441665.5,
442
  "learning_rate": 2.2857142857142858e-05,
443
- "loss": 1.3629,
444
  "step": 1200
445
  },
446
  {
447
  "epoch": 12.0,
448
- "eval_IoU": 0.0,
449
- "eval_loss": 1.3613262176513672,
450
- "eval_runtime": 10.2624,
451
- "eval_samples_per_second": 109.429,
452
- "eval_steps_per_second": 1.754,
453
  "step": 1200
454
  },
455
  {
456
  "epoch": 12.25,
457
- "grad_norm": 4257276.5,
458
  "learning_rate": 2.214285714285714e-05,
459
- "loss": 1.3485,
460
  "step": 1225
461
  },
462
  {
463
  "epoch": 12.5,
464
- "grad_norm": 810549.6875,
465
  "learning_rate": 2.1428571428571428e-05,
466
- "loss": 1.3481,
467
  "step": 1250
468
  },
469
  {
470
  "epoch": 12.75,
471
- "grad_norm": 1533887.125,
472
  "learning_rate": 2.0714285714285718e-05,
473
- "loss": 1.3258,
474
  "step": 1275
475
  },
476
  {
477
  "epoch": 13.0,
478
- "grad_norm": 4748758.0,
479
  "learning_rate": 2e-05,
480
- "loss": 1.3322,
481
  "step": 1300
482
  },
483
  {
484
  "epoch": 13.0,
485
- "eval_IoU": 0.0,
486
- "eval_loss": 1.3555089235305786,
487
- "eval_runtime": 10.3606,
488
- "eval_samples_per_second": 108.391,
489
- "eval_steps_per_second": 1.737,
490
  "step": 1300
491
  },
492
  {
493
  "epoch": 13.25,
494
- "grad_norm": 2167070.25,
495
  "learning_rate": 1.928571428571429e-05,
496
- "loss": 1.3428,
497
  "step": 1325
498
  },
499
  {
500
  "epoch": 13.5,
501
- "grad_norm": 1552955.5,
502
  "learning_rate": 1.8571428571428572e-05,
503
- "loss": 1.3287,
504
  "step": 1350
505
  },
506
  {
507
  "epoch": 13.75,
508
- "grad_norm": 4735588.5,
509
  "learning_rate": 1.785714285714286e-05,
510
- "loss": 1.3376,
511
  "step": 1375
512
  },
513
  {
514
  "epoch": 14.0,
515
- "grad_norm": 6000364.0,
516
  "learning_rate": 1.7142857142857145e-05,
517
- "loss": 1.3185,
518
  "step": 1400
519
  },
520
  {
521
  "epoch": 14.0,
522
- "eval_IoU": 0.0,
523
- "eval_loss": 1.3536880016326904,
524
- "eval_runtime": 10.1961,
525
- "eval_samples_per_second": 110.14,
526
- "eval_steps_per_second": 1.765,
527
  "step": 1400
528
  },
529
  {
530
  "epoch": 14.25,
531
- "grad_norm": 1262304.125,
532
  "learning_rate": 1.642857142857143e-05,
533
- "loss": 1.3381,
534
  "step": 1425
535
  },
536
  {
537
  "epoch": 14.5,
538
- "grad_norm": 923730.6875,
539
  "learning_rate": 1.5714285714285715e-05,
540
- "loss": 1.3302,
541
  "step": 1450
542
  },
543
  {
544
  "epoch": 14.75,
545
- "grad_norm": 654002.0,
546
  "learning_rate": 1.5e-05,
547
- "loss": 1.3242,
548
  "step": 1475
549
  },
550
  {
551
  "epoch": 15.0,
552
- "grad_norm": 1146749.125,
553
  "learning_rate": 1.4285714285714285e-05,
554
- "loss": 1.3254,
555
  "step": 1500
556
  },
557
  {
558
  "epoch": 15.0,
559
- "eval_IoU": 0.0,
560
- "eval_loss": 1.3588523864746094,
561
- "eval_runtime": 10.2312,
562
- "eval_samples_per_second": 109.762,
563
- "eval_steps_per_second": 1.759,
564
  "step": 1500
565
  },
566
  {
567
  "epoch": 15.25,
568
- "grad_norm": 1684872.625,
569
  "learning_rate": 1.357142857142857e-05,
570
- "loss": 1.3347,
571
  "step": 1525
572
  },
573
  {
574
  "epoch": 15.5,
575
- "grad_norm": 1137702.0,
576
  "learning_rate": 1.2857142857142857e-05,
577
- "loss": 1.3213,
578
  "step": 1550
579
  },
580
  {
581
  "epoch": 15.75,
582
- "grad_norm": 1545808.0,
583
  "learning_rate": 1.2142857142857144e-05,
584
- "loss": 1.3084,
585
  "step": 1575
586
  },
587
  {
588
  "epoch": 16.0,
589
- "grad_norm": 3496195.75,
590
  "learning_rate": 1.1428571428571429e-05,
591
- "loss": 1.3343,
592
  "step": 1600
593
  },
594
  {
595
  "epoch": 16.0,
596
- "eval_IoU": 0.0,
597
- "eval_loss": 1.3545335531234741,
598
- "eval_runtime": 10.4337,
599
- "eval_samples_per_second": 107.632,
600
- "eval_steps_per_second": 1.725,
601
  "step": 1600
602
  },
603
  {
604
  "epoch": 16.25,
605
- "grad_norm": 3201461.5,
606
  "learning_rate": 1.0714285714285714e-05,
607
- "loss": 1.3219,
608
  "step": 1625
609
  },
610
  {
611
  "epoch": 16.5,
612
- "grad_norm": 1067260.875,
613
  "learning_rate": 1e-05,
614
- "loss": 1.3258,
615
  "step": 1650
616
  },
617
  {
618
  "epoch": 16.75,
619
- "grad_norm": 2405265.25,
620
  "learning_rate": 9.285714285714286e-06,
621
- "loss": 1.3412,
622
  "step": 1675
623
  },
624
  {
625
  "epoch": 17.0,
626
- "grad_norm": 1497889.125,
627
  "learning_rate": 8.571428571428573e-06,
628
- "loss": 1.3282,
629
  "step": 1700
630
  },
631
  {
632
  "epoch": 17.0,
633
- "eval_IoU": 0.0,
634
- "eval_loss": 1.3675432205200195,
635
- "eval_runtime": 10.0896,
636
- "eval_samples_per_second": 111.302,
637
- "eval_steps_per_second": 1.784,
638
  "step": 1700
639
  },
640
  {
641
  "epoch": 17.25,
642
- "grad_norm": 2305034.25,
643
  "learning_rate": 7.857142857142858e-06,
644
- "loss": 1.3297,
645
  "step": 1725
646
  },
647
  {
648
  "epoch": 17.5,
649
- "grad_norm": 1990793.25,
650
  "learning_rate": 7.142857142857143e-06,
651
- "loss": 1.3318,
652
  "step": 1750
653
  },
654
  {
655
  "epoch": 17.75,
656
- "grad_norm": 832195.9375,
657
  "learning_rate": 6.428571428571429e-06,
658
- "loss": 1.3334,
659
  "step": 1775
660
  },
661
  {
662
  "epoch": 18.0,
663
- "grad_norm": 3059447.75,
664
  "learning_rate": 5.7142857142857145e-06,
665
- "loss": 1.335,
666
  "step": 1800
667
  },
668
  {
669
  "epoch": 18.0,
670
- "eval_IoU": 0.0,
671
- "eval_loss": 1.3514825105667114,
672
- "eval_runtime": 10.1648,
673
- "eval_samples_per_second": 110.48,
674
- "eval_steps_per_second": 1.771,
675
  "step": 1800
676
  },
677
  {
678
  "epoch": 18.25,
679
- "grad_norm": 2467115.5,
680
  "learning_rate": 5e-06,
681
- "loss": 1.3281,
682
  "step": 1825
683
  },
684
  {
685
  "epoch": 18.5,
686
- "grad_norm": 1106492.0,
687
  "learning_rate": 4.285714285714286e-06,
688
- "loss": 1.3091,
689
  "step": 1850
690
  },
691
  {
692
  "epoch": 18.75,
693
- "grad_norm": 2224053.0,
694
  "learning_rate": 3.5714285714285714e-06,
695
- "loss": 1.3299,
696
  "step": 1875
697
  },
698
  {
699
  "epoch": 19.0,
700
- "grad_norm": 2320941.0,
701
  "learning_rate": 2.8571428571428573e-06,
702
- "loss": 1.3259,
703
  "step": 1900
704
  },
705
  {
706
  "epoch": 19.0,
707
- "eval_IoU": 0.0,
708
- "eval_loss": 1.3499101400375366,
709
- "eval_runtime": 9.933,
710
- "eval_samples_per_second": 113.057,
711
- "eval_steps_per_second": 1.812,
712
  "step": 1900
713
  },
714
  {
715
  "epoch": 19.25,
716
- "grad_norm": 1948301.25,
717
  "learning_rate": 2.142857142857143e-06,
718
- "loss": 1.3186,
719
  "step": 1925
720
  },
721
  {
722
  "epoch": 19.5,
723
- "grad_norm": 497170.53125,
724
  "learning_rate": 1.4285714285714286e-06,
725
- "loss": 1.3113,
726
  "step": 1950
727
  },
728
  {
729
  "epoch": 19.75,
730
- "grad_norm": 257136.8125,
731
  "learning_rate": 7.142857142857143e-07,
732
- "loss": 1.3326,
733
  "step": 1975
734
  },
735
  {
736
  "epoch": 20.0,
737
- "grad_norm": 734737.9375,
738
  "learning_rate": 0.0,
739
- "loss": 1.3144,
740
  "step": 2000
741
  },
742
  {
743
  "epoch": 20.0,
744
- "eval_IoU": 0.0,
745
- "eval_loss": 1.3494025468826294,
746
- "eval_runtime": 10.2833,
747
- "eval_samples_per_second": 109.206,
748
- "eval_steps_per_second": 1.75,
749
  "step": 2000
750
  },
751
  {
752
  "epoch": 20.0,
753
  "step": 2000,
754
  "total_flos": 0.0,
755
- "train_loss": 1.3746022605895996,
756
- "train_runtime": 3619.0694,
757
- "train_samples_per_second": 35.142,
758
- "train_steps_per_second": 0.553
759
  }
760
  ],
761
  "logging_steps": 25,
 
1
  {
2
+ "best_metric": 0.871971845626831,
3
  "best_model_checkpoint": "/mnt/ml_drive/kcardenas/target_hold/checkpoint-2000",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
+ "grad_norm": 671978.625,
14
  "learning_rate": 5e-06,
15
+ "loss": 1.3666,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5,
20
+ "grad_norm": 686243.6875,
21
  "learning_rate": 1e-05,
22
+ "loss": 1.3413,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.75,
27
+ "grad_norm": 512857.65625,
28
  "learning_rate": 1.5e-05,
29
+ "loss": 1.2866,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "grad_norm": 536989.5625,
35
  "learning_rate": 2e-05,
36
+ "loss": 1.2348,
37
  "step": 100
38
  },
39
  {
40
  "epoch": 1.0,
41
+ "eval_IoU": 6.266252117587144e-05,
42
+ "eval_loss": 1.1666094064712524,
43
+ "eval_runtime": 16.7443,
44
+ "eval_samples_per_second": 67.068,
45
+ "eval_steps_per_second": 1.075,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.25,
50
+ "grad_norm": 261045.0625,
51
  "learning_rate": 2.5e-05,
52
+ "loss": 1.1566,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.5,
57
+ "grad_norm": 215879.234375,
58
  "learning_rate": 3e-05,
59
+ "loss": 1.094,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.75,
64
+ "grad_norm": 149507.1875,
65
  "learning_rate": 3.5e-05,
66
+ "loss": 1.037,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
+ "grad_norm": 179843.1875,
72
  "learning_rate": 4e-05,
73
+ "loss": 1.0043,
74
  "step": 200
75
  },
76
  {
77
  "epoch": 2.0,
78
+ "eval_IoU": 0.002259200195430286,
79
+ "eval_loss": 0.9816299080848694,
80
+ "eval_runtime": 16.1893,
81
+ "eval_samples_per_second": 69.367,
82
+ "eval_steps_per_second": 1.112,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.25,
87
+ "grad_norm": 79894.609375,
88
  "learning_rate": 4.5e-05,
89
+ "loss": 0.9636,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.5,
94
+ "grad_norm": 58901.21484375,
95
  "learning_rate": 5e-05,
96
+ "loss": 0.9395,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 2.75,
101
+ "grad_norm": 46244.05859375,
102
  "learning_rate": 4.928571428571429e-05,
103
+ "loss": 0.9225,
104
  "step": 275
105
  },
106
  {
107
  "epoch": 3.0,
108
+ "grad_norm": 31114.857421875,
109
  "learning_rate": 4.8571428571428576e-05,
110
+ "loss": 0.9101,
111
  "step": 300
112
  },
113
  {
114
  "epoch": 3.0,
115
+ "eval_IoU": 0.002016003165160829,
116
+ "eval_loss": 0.9057585597038269,
117
+ "eval_runtime": 16.1324,
118
+ "eval_samples_per_second": 69.611,
119
+ "eval_steps_per_second": 1.116,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.25,
124
+ "grad_norm": 25909.61328125,
125
  "learning_rate": 4.785714285714286e-05,
126
+ "loss": 0.9011,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.5,
131
+ "grad_norm": 25156.486328125,
132
  "learning_rate": 4.714285714285714e-05,
133
+ "loss": 0.8919,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 3.75,
138
+ "grad_norm": 31189.755859375,
139
  "learning_rate": 4.642857142857143e-05,
140
+ "loss": 0.8876,
141
  "step": 375
142
  },
143
  {
144
  "epoch": 4.0,
145
+ "grad_norm": 23475.142578125,
146
  "learning_rate": 4.5714285714285716e-05,
147
+ "loss": 0.8846,
148
  "step": 400
149
  },
150
  {
151
  "epoch": 4.0,
152
+ "eval_IoU": 0.001328349647772117,
153
+ "eval_loss": 0.8883054256439209,
154
+ "eval_runtime": 16.0469,
155
+ "eval_samples_per_second": 69.982,
156
+ "eval_steps_per_second": 1.122,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.25,
161
+ "grad_norm": 17651.619140625,
162
  "learning_rate": 4.5e-05,
163
+ "loss": 0.8822,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 4.5,
168
+ "grad_norm": 12211.6630859375,
169
  "learning_rate": 4.428571428571428e-05,
170
+ "loss": 0.8777,
171
  "step": 450
172
  },
173
  {
174
  "epoch": 4.75,
175
+ "grad_norm": 28110.140625,
176
  "learning_rate": 4.3571428571428576e-05,
177
+ "loss": 0.8786,
178
  "step": 475
179
  },
180
  {
181
  "epoch": 5.0,
182
+ "grad_norm": 10401.6005859375,
183
  "learning_rate": 4.2857142857142856e-05,
184
+ "loss": 0.8755,
185
  "step": 500
186
  },
187
  {
188
  "epoch": 5.0,
189
+ "eval_IoU": 0.0011379811073136117,
190
+ "eval_loss": 0.8818888664245605,
191
+ "eval_runtime": 15.912,
192
+ "eval_samples_per_second": 70.575,
193
+ "eval_steps_per_second": 1.131,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.25,
198
+ "grad_norm": 14044.361328125,
199
  "learning_rate": 4.214285714285714e-05,
200
+ "loss": 0.8734,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 5.5,
205
+ "grad_norm": 8820.140625,
206
  "learning_rate": 4.1428571428571437e-05,
207
+ "loss": 0.875,
208
  "step": 550
209
  },
210
  {
211
  "epoch": 5.75,
212
+ "grad_norm": 7751.67822265625,
213
  "learning_rate": 4.0714285714285717e-05,
214
+ "loss": 0.8758,
215
  "step": 575
216
  },
217
  {
218
  "epoch": 6.0,
219
+ "grad_norm": 29417.427734375,
220
  "learning_rate": 4e-05,
221
+ "loss": 0.8714,
222
  "step": 600
223
  },
224
  {
225
  "epoch": 6.0,
226
+ "eval_IoU": 0.0010282730649858857,
227
+ "eval_loss": 0.8788518905639648,
228
+ "eval_runtime": 16.1196,
229
+ "eval_samples_per_second": 69.667,
230
+ "eval_steps_per_second": 1.117,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 6.25,
235
+ "grad_norm": 13193.9521484375,
236
  "learning_rate": 3.928571428571429e-05,
237
+ "loss": 0.87,
238
  "step": 625
239
  },
240
  {
241
  "epoch": 6.5,
242
+ "grad_norm": 9639.7734375,
243
  "learning_rate": 3.857142857142858e-05,
244
+ "loss": 0.8709,
245
  "step": 650
246
  },
247
  {
248
  "epoch": 6.75,
249
+ "grad_norm": 42599.96484375,
250
  "learning_rate": 3.785714285714286e-05,
251
+ "loss": 0.8693,
252
  "step": 675
253
  },
254
  {
255
  "epoch": 7.0,
256
+ "grad_norm": 22185.576171875,
257
  "learning_rate": 3.7142857142857143e-05,
258
+ "loss": 0.8684,
259
  "step": 700
260
  },
261
  {
262
  "epoch": 7.0,
263
+ "eval_IoU": 0.0009165315499091501,
264
+ "eval_loss": 0.8773335814476013,
265
+ "eval_runtime": 15.6201,
266
+ "eval_samples_per_second": 71.894,
267
+ "eval_steps_per_second": 1.152,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 7.25,
272
+ "grad_norm": 8237.5849609375,
273
  "learning_rate": 3.642857142857143e-05,
274
+ "loss": 0.8692,
275
  "step": 725
276
  },
277
  {
278
  "epoch": 7.5,
279
+ "grad_norm": 9722.630859375,
280
  "learning_rate": 3.571428571428572e-05,
281
+ "loss": 0.8709,
282
  "step": 750
283
  },
284
  {
285
  "epoch": 7.75,
286
+ "grad_norm": 11062.8916015625,
287
  "learning_rate": 3.5e-05,
288
+ "loss": 0.8652,
289
  "step": 775
290
  },
291
  {
292
  "epoch": 8.0,
293
+ "grad_norm": 34961.71484375,
294
  "learning_rate": 3.428571428571429e-05,
295
+ "loss": 0.8664,
296
  "step": 800
297
  },
298
  {
299
  "epoch": 8.0,
300
+ "eval_IoU": 0.0008482180748956496,
301
+ "eval_loss": 0.8764283061027527,
302
+ "eval_runtime": 15.6883,
303
+ "eval_samples_per_second": 71.582,
304
+ "eval_steps_per_second": 1.147,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 8.25,
309
+ "grad_norm": 11363.548828125,
310
  "learning_rate": 3.357142857142857e-05,
311
+ "loss": 0.8658,
312
  "step": 825
313
  },
314
  {
315
  "epoch": 8.5,
316
+ "grad_norm": 21425.958984375,
317
  "learning_rate": 3.285714285714286e-05,
318
+ "loss": 0.8673,
319
  "step": 850
320
  },
321
  {
322
  "epoch": 8.75,
323
+ "grad_norm": 20737.205078125,
324
  "learning_rate": 3.2142857142857144e-05,
325
+ "loss": 0.8637,
326
  "step": 875
327
  },
328
  {
329
  "epoch": 9.0,
330
+ "grad_norm": 15000.978515625,
331
  "learning_rate": 3.142857142857143e-05,
332
+ "loss": 0.8677,
333
  "step": 900
334
  },
335
  {
336
  "epoch": 9.0,
337
+ "eval_IoU": 0.0008826124813546499,
338
+ "eval_loss": 0.875173807144165,
339
+ "eval_runtime": 15.8392,
340
+ "eval_samples_per_second": 70.9,
341
+ "eval_steps_per_second": 1.136,
342
  "step": 900
343
  },
344
  {
345
  "epoch": 9.25,
346
+ "grad_norm": 20330.591796875,
347
  "learning_rate": 3.071428571428572e-05,
348
+ "loss": 0.8675,
349
  "step": 925
350
  },
351
  {
352
  "epoch": 9.5,
353
+ "grad_norm": 14069.2763671875,
354
  "learning_rate": 3e-05,
355
+ "loss": 0.8662,
356
  "step": 950
357
  },
358
  {
359
  "epoch": 9.75,
360
+ "grad_norm": 18826.400390625,
361
  "learning_rate": 2.9285714285714288e-05,
362
+ "loss": 0.8625,
363
  "step": 975
364
  },
365
  {
366
  "epoch": 10.0,
367
+ "grad_norm": 19264.611328125,
368
  "learning_rate": 2.857142857142857e-05,
369
+ "loss": 0.863,
370
  "step": 1000
371
  },
372
  {
373
  "epoch": 10.0,
374
+ "eval_IoU": 0.0008654725467694338,
375
+ "eval_loss": 0.8746740221977234,
376
+ "eval_runtime": 16.0579,
377
+ "eval_samples_per_second": 69.935,
378
+ "eval_steps_per_second": 1.121,
379
  "step": 1000
380
  },
381
  {
382
  "epoch": 10.25,
383
+ "grad_norm": 8239.904296875,
384
  "learning_rate": 2.785714285714286e-05,
385
+ "loss": 0.8644,
386
  "step": 1025
387
  },
388
  {
389
  "epoch": 10.5,
390
+ "grad_norm": 11806.5556640625,
391
  "learning_rate": 2.714285714285714e-05,
392
+ "loss": 0.8629,
393
  "step": 1050
394
  },
395
  {
396
  "epoch": 10.75,
397
+ "grad_norm": 8895.9296875,
398
  "learning_rate": 2.642857142857143e-05,
399
+ "loss": 0.8665,
400
  "step": 1075
401
  },
402
  {
403
  "epoch": 11.0,
404
+ "grad_norm": 14509.6494140625,
405
  "learning_rate": 2.5714285714285714e-05,
406
+ "loss": 0.8619,
407
  "step": 1100
408
  },
409
  {
410
  "epoch": 11.0,
411
+ "eval_IoU": 0.0008878591602175693,
412
+ "eval_loss": 0.873702883720398,
413
+ "eval_runtime": 15.7003,
414
+ "eval_samples_per_second": 71.527,
415
+ "eval_steps_per_second": 1.146,
416
  "step": 1100
417
  },
418
  {
419
  "epoch": 11.25,
420
+ "grad_norm": 16054.029296875,
421
  "learning_rate": 2.5e-05,
422
+ "loss": 0.8632,
423
  "step": 1125
424
  },
425
  {
426
  "epoch": 11.5,
427
+ "grad_norm": 13585.3857421875,
428
  "learning_rate": 2.4285714285714288e-05,
429
+ "loss": 0.8644,
430
  "step": 1150
431
  },
432
  {
433
  "epoch": 11.75,
434
+ "grad_norm": 13984.9892578125,
435
  "learning_rate": 2.357142857142857e-05,
436
+ "loss": 0.8627,
437
  "step": 1175
438
  },
439
  {
440
  "epoch": 12.0,
441
+ "grad_norm": 10830.5859375,
442
  "learning_rate": 2.2857142857142858e-05,
443
+ "loss": 0.8637,
444
  "step": 1200
445
  },
446
  {
447
  "epoch": 12.0,
448
+ "eval_IoU": 0.0008766921482666689,
449
+ "eval_loss": 0.8732270002365112,
450
+ "eval_runtime": 15.714,
451
+ "eval_samples_per_second": 71.465,
452
+ "eval_steps_per_second": 1.145,
453
  "step": 1200
454
  },
455
  {
456
  "epoch": 12.25,
457
+ "grad_norm": 9003.2802734375,
458
  "learning_rate": 2.214285714285714e-05,
459
+ "loss": 0.8638,
460
  "step": 1225
461
  },
462
  {
463
  "epoch": 12.5,
464
+ "grad_norm": 20906.34375,
465
  "learning_rate": 2.1428571428571428e-05,
466
+ "loss": 0.8607,
467
  "step": 1250
468
  },
469
  {
470
  "epoch": 12.75,
471
+ "grad_norm": 6575.8974609375,
472
  "learning_rate": 2.0714285714285718e-05,
473
+ "loss": 0.8616,
474
  "step": 1275
475
  },
476
  {
477
  "epoch": 13.0,
478
+ "grad_norm": 12234.8251953125,
479
  "learning_rate": 2e-05,
480
+ "loss": 0.8632,
481
  "step": 1300
482
  },
483
  {
484
  "epoch": 13.0,
485
+ "eval_IoU": 0.0008608651426555169,
486
+ "eval_loss": 0.8730344772338867,
487
+ "eval_runtime": 16.3659,
488
+ "eval_samples_per_second": 68.618,
489
+ "eval_steps_per_second": 1.1,
490
  "step": 1300
491
  },
492
  {
493
  "epoch": 13.25,
494
+ "grad_norm": 8454.9755859375,
495
  "learning_rate": 1.928571428571429e-05,
496
+ "loss": 0.864,
497
  "step": 1325
498
  },
499
  {
500
  "epoch": 13.5,
501
+ "grad_norm": 16713.82421875,
502
  "learning_rate": 1.8571428571428572e-05,
503
+ "loss": 0.8623,
504
  "step": 1350
505
  },
506
  {
507
  "epoch": 13.75,
508
+ "grad_norm": 17913.958984375,
509
  "learning_rate": 1.785714285714286e-05,
510
+ "loss": 0.8649,
511
  "step": 1375
512
  },
513
  {
514
  "epoch": 14.0,
515
+ "grad_norm": 33508.1640625,
516
  "learning_rate": 1.7142857142857145e-05,
517
+ "loss": 0.8581,
518
  "step": 1400
519
  },
520
  {
521
  "epoch": 14.0,
522
+ "eval_IoU": 0.0008642116435639467,
523
+ "eval_loss": 0.8726587295532227,
524
+ "eval_runtime": 15.9379,
525
+ "eval_samples_per_second": 70.461,
526
+ "eval_steps_per_second": 1.129,
527
  "step": 1400
528
  },
529
  {
530
  "epoch": 14.25,
531
+ "grad_norm": 10902.5810546875,
532
  "learning_rate": 1.642857142857143e-05,
533
+ "loss": 0.8626,
534
  "step": 1425
535
  },
536
  {
537
  "epoch": 14.5,
538
+ "grad_norm": 5604.90576171875,
539
  "learning_rate": 1.5714285714285715e-05,
540
+ "loss": 0.8581,
541
  "step": 1450
542
  },
543
  {
544
  "epoch": 14.75,
545
+ "grad_norm": 11584.734375,
546
  "learning_rate": 1.5e-05,
547
+ "loss": 0.8644,
548
  "step": 1475
549
  },
550
  {
551
  "epoch": 15.0,
552
+ "grad_norm": 10246.080078125,
553
  "learning_rate": 1.4285714285714285e-05,
554
+ "loss": 0.8615,
555
  "step": 1500
556
  },
557
  {
558
  "epoch": 15.0,
559
+ "eval_IoU": 0.0008597120680148628,
560
+ "eval_loss": 0.8724328279495239,
561
+ "eval_runtime": 15.6896,
562
+ "eval_samples_per_second": 71.576,
563
+ "eval_steps_per_second": 1.147,
564
  "step": 1500
565
  },
566
  {
567
  "epoch": 15.25,
568
+ "grad_norm": 8243.01953125,
569
  "learning_rate": 1.357142857142857e-05,
570
+ "loss": 0.8617,
571
  "step": 1525
572
  },
573
  {
574
  "epoch": 15.5,
575
+ "grad_norm": 9738.013671875,
576
  "learning_rate": 1.2857142857142857e-05,
577
+ "loss": 0.8618,
578
  "step": 1550
579
  },
580
  {
581
  "epoch": 15.75,
582
+ "grad_norm": 10498.345703125,
583
  "learning_rate": 1.2142857142857144e-05,
584
+ "loss": 0.8603,
585
  "step": 1575
586
  },
587
  {
588
  "epoch": 16.0,
589
+ "grad_norm": 31001.509765625,
590
  "learning_rate": 1.1428571428571429e-05,
591
+ "loss": 0.8604,
592
  "step": 1600
593
  },
594
  {
595
  "epoch": 16.0,
596
+ "eval_IoU": 0.0008376356411078918,
597
+ "eval_loss": 0.8724232912063599,
598
+ "eval_runtime": 16.5134,
599
+ "eval_samples_per_second": 68.005,
600
+ "eval_steps_per_second": 1.09,
601
  "step": 1600
602
  },
603
  {
604
  "epoch": 16.25,
605
+ "grad_norm": 23480.4375,
606
  "learning_rate": 1.0714285714285714e-05,
607
+ "loss": 0.8622,
608
  "step": 1625
609
  },
610
  {
611
  "epoch": 16.5,
612
+ "grad_norm": 10336.7646484375,
613
  "learning_rate": 1e-05,
614
+ "loss": 0.8593,
615
  "step": 1650
616
  },
617
  {
618
  "epoch": 16.75,
619
+ "grad_norm": 13781.5810546875,
620
  "learning_rate": 9.285714285714286e-06,
621
+ "loss": 0.8627,
622
  "step": 1675
623
  },
624
  {
625
  "epoch": 17.0,
626
+ "grad_norm": 8223.326171875,
627
  "learning_rate": 8.571428571428573e-06,
628
+ "loss": 0.8606,
629
  "step": 1700
630
  },
631
  {
632
  "epoch": 17.0,
633
+ "eval_IoU": 0.0008638472124057635,
634
+ "eval_loss": 0.8720372319221497,
635
+ "eval_runtime": 16.0284,
636
+ "eval_samples_per_second": 70.063,
637
+ "eval_steps_per_second": 1.123,
638
  "step": 1700
639
  },
640
  {
641
  "epoch": 17.25,
642
+ "grad_norm": 6056.0810546875,
643
  "learning_rate": 7.857142857142858e-06,
644
+ "loss": 0.8616,
645
  "step": 1725
646
  },
647
  {
648
  "epoch": 17.5,
649
+ "grad_norm": 13163.3916015625,
650
  "learning_rate": 7.142857142857143e-06,
651
+ "loss": 0.8626,
652
  "step": 1750
653
  },
654
  {
655
  "epoch": 17.75,
656
+ "grad_norm": 7198.291015625,
657
  "learning_rate": 6.428571428571429e-06,
658
+ "loss": 0.862,
659
  "step": 1775
660
  },
661
  {
662
  "epoch": 18.0,
663
+ "grad_norm": 30992.525390625,
664
  "learning_rate": 5.7142857142857145e-06,
665
+ "loss": 0.8592,
666
  "step": 1800
667
  },
668
  {
669
  "epoch": 18.0,
670
+ "eval_IoU": 0.000850173645477854,
671
+ "eval_loss": 0.8720242381095886,
672
+ "eval_runtime": 15.5216,
673
+ "eval_samples_per_second": 72.351,
674
+ "eval_steps_per_second": 1.16,
675
  "step": 1800
676
  },
677
  {
678
  "epoch": 18.25,
679
+ "grad_norm": 8230.2451171875,
680
  "learning_rate": 5e-06,
681
+ "loss": 0.859,
682
  "step": 1825
683
  },
684
  {
685
  "epoch": 18.5,
686
+ "grad_norm": 9012.4931640625,
687
  "learning_rate": 4.285714285714286e-06,
688
+ "loss": 0.8601,
689
  "step": 1850
690
  },
691
  {
692
  "epoch": 18.75,
693
+ "grad_norm": 12221.1279296875,
694
  "learning_rate": 3.5714285714285714e-06,
695
+ "loss": 0.8608,
696
  "step": 1875
697
  },
698
  {
699
  "epoch": 19.0,
700
+ "grad_norm": 26469.41015625,
701
  "learning_rate": 2.8571428571428573e-06,
702
+ "loss": 0.8621,
703
  "step": 1900
704
  },
705
  {
706
  "epoch": 19.0,
707
+ "eval_IoU": 0.0008449985251271719,
708
+ "eval_loss": 0.8720095753669739,
709
+ "eval_runtime": 15.4454,
710
+ "eval_samples_per_second": 72.708,
711
+ "eval_steps_per_second": 1.165,
712
  "step": 1900
713
  },
714
  {
715
  "epoch": 19.25,
716
+ "grad_norm": 12594.4345703125,
717
  "learning_rate": 2.142857142857143e-06,
718
+ "loss": 0.8623,
719
  "step": 1925
720
  },
721
  {
722
  "epoch": 19.5,
723
+ "grad_norm": 7997.478515625,
724
  "learning_rate": 1.4285714285714286e-06,
725
+ "loss": 0.8597,
726
  "step": 1950
727
  },
728
  {
729
  "epoch": 19.75,
730
+ "grad_norm": 8224.8974609375,
731
  "learning_rate": 7.142857142857143e-07,
732
+ "loss": 0.8605,
733
  "step": 1975
734
  },
735
  {
736
  "epoch": 20.0,
737
+ "grad_norm": 18461.4453125,
738
  "learning_rate": 0.0,
739
+ "loss": 0.8629,
740
  "step": 2000
741
  },
742
  {
743
  "epoch": 20.0,
744
+ "eval_IoU": 0.0008468033398795426,
745
+ "eval_loss": 0.871971845626831,
746
+ "eval_runtime": 15.8784,
747
+ "eval_samples_per_second": 70.725,
748
+ "eval_steps_per_second": 1.134,
749
  "step": 2000
750
  },
751
  {
752
  "epoch": 20.0,
753
  "step": 2000,
754
  "total_flos": 0.0,
755
+ "train_loss": 0.9022787961959838,
756
+ "train_runtime": 6945.9457,
757
+ "train_samples_per_second": 18.31,
758
+ "train_steps_per_second": 0.288
759
  }
760
  ],
761
  "logging_steps": 25,