ChiefTheLord commited on
Commit
cf135e5
verified
1 Parent(s): 5955d4f

Delete flickr8k_checkpoints/checkpoint-1208-4

Browse files
flickr8k_checkpoints/checkpoint-1208-4/adapter.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:036ae2e3f68d8825f647fd122ccafb02672fdd5778c3ae2173a7ebf151133ec0
3
- size 17064932
 
 
 
 
flickr8k_checkpoints/checkpoint-1208-4/eval_state.json DELETED
The diff for this file is too large to render. See raw diff
 
flickr8k_checkpoints/checkpoint-1208-4/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a74396a35b61b52ab12c8535187e9775329dc481983cd70a61f0b20dbfcf419
3
- size 8714492
 
 
 
 
flickr8k_checkpoints/checkpoint-1208-4/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
- size 14244
 
 
 
 
flickr8k_checkpoints/checkpoint-1208-4/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:229084bbb0e922d44dba4cc9206f54220a162a8229897c2f3d636ef81c5c5418
3
- size 1064
 
 
 
 
flickr8k_checkpoints/checkpoint-1208-4/trainer_state.json DELETED
@@ -1,596 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "eval_steps": 500,
6
- "global_step": 1208,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.026490066225165563,
13
- "grad_norm": 0.1006568893790245,
14
- "learning_rate": 0.0004324324324324325,
15
- "loss": 4.0361,
16
- "step": 16
17
- },
18
- {
19
- "epoch": 0.052980132450331126,
20
- "grad_norm": 0.1183805838227272,
21
- "learning_rate": 0.000864864864864865,
22
- "loss": 3.9854,
23
- "step": 32
24
- },
25
- {
26
- "epoch": 0.07947019867549669,
27
- "grad_norm": 0.11201324313879013,
28
- "learning_rate": 0.0009997822892796068,
29
- "loss": 4.0121,
30
- "step": 48
31
- },
32
- {
33
- "epoch": 0.10596026490066225,
34
- "grad_norm": 0.13807597756385803,
35
- "learning_rate": 0.000998688816161266,
36
- "loss": 4.0147,
37
- "step": 64
38
- },
39
- {
40
- "epoch": 0.13245033112582782,
41
- "grad_norm": 0.1787775456905365,
42
- "learning_rate": 0.0009966766110013582,
43
- "loss": 4.0057,
44
- "step": 80
45
- },
46
- {
47
- "epoch": 0.15894039735099338,
48
- "grad_norm": 0.1131894588470459,
49
- "learning_rate": 0.0009937493808759087,
50
- "loss": 4.0668,
51
- "step": 96
52
- },
53
- {
54
- "epoch": 0.18543046357615894,
55
- "grad_norm": 0.13122417032718658,
56
- "learning_rate": 0.0009899125186070988,
57
- "loss": 4.0053,
58
- "step": 112
59
- },
60
- {
61
- "epoch": 0.2119205298013245,
62
- "grad_norm": 0.1222459226846695,
63
- "learning_rate": 0.0009851730928280944,
64
- "loss": 3.9903,
65
- "step": 128
66
- },
67
- {
68
- "epoch": 0.23841059602649006,
69
- "grad_norm": 0.11710074543952942,
70
- "learning_rate": 0.0009795398349605373,
71
- "loss": 3.9932,
72
- "step": 144
73
- },
74
- {
75
- "epoch": 0.26490066225165565,
76
- "grad_norm": 0.11267052590847015,
77
- "learning_rate": 0.0009730231231286876,
78
- "loss": 3.9942,
79
- "step": 160
80
- },
81
- {
82
- "epoch": 0.2913907284768212,
83
- "grad_norm": 0.11705270409584045,
84
- "learning_rate": 0.0009656349630398554,
85
- "loss": 3.9903,
86
- "step": 176
87
- },
88
- {
89
- "epoch": 0.31788079470198677,
90
- "grad_norm": 0.11069143563508987,
91
- "learning_rate": 0.0009573889658663424,
92
- "loss": 4.0288,
93
- "step": 192
94
- },
95
- {
96
- "epoch": 0.3443708609271523,
97
- "grad_norm": 0.12676870822906494,
98
- "learning_rate": 0.0009483003231696446,
99
- "loss": 3.9981,
100
- "step": 208
101
- },
102
- {
103
- "epoch": 0.3708609271523179,
104
- "grad_norm": 0.1115901842713356,
105
- "learning_rate": 0.0009383857789131097,
106
- "loss": 3.9991,
107
- "step": 224
108
- },
109
- {
110
- "epoch": 0.3973509933774834,
111
- "grad_norm": 0.10467631369829178,
112
- "learning_rate": 0.0009276635986146136,
113
- "loss": 3.9799,
114
- "step": 240
115
- },
116
- {
117
- "epoch": 0.423841059602649,
118
- "grad_norm": 0.09870990365743637,
119
- "learning_rate": 0.0009161535356960828,
120
- "loss": 4.0001,
121
- "step": 256
122
- },
123
- {
124
- "epoch": 0.4503311258278146,
125
- "grad_norm": 0.10558207333087921,
126
- "learning_rate": 0.0009038767950918592,
127
- "loss": 3.9763,
128
- "step": 272
129
- },
130
- {
131
- "epoch": 0.4768211920529801,
132
- "grad_norm": 0.10417157411575317,
133
- "learning_rate": 0.0008908559941829497,
134
- "loss": 3.9728,
135
- "step": 288
136
- },
137
- {
138
- "epoch": 0.5033112582781457,
139
- "grad_norm": 0.10544515401124954,
140
- "learning_rate": 0.0008771151211291332,
141
- "loss": 3.9765,
142
- "step": 304
143
- },
144
- {
145
- "epoch": 0.5298013245033113,
146
- "grad_norm": 0.10653045773506165,
147
- "learning_rate": 0.0008626794906756866,
148
- "loss": 3.9607,
149
- "step": 320
150
- },
151
- {
152
- "epoch": 0.5562913907284768,
153
- "grad_norm": 0.11847585439682007,
154
- "learning_rate": 0.0008475756975161504,
155
- "loss": 3.9961,
156
- "step": 336
157
- },
158
- {
159
- "epoch": 0.5827814569536424,
160
- "grad_norm": 0.1187497153878212,
161
- "learning_rate": 0.00083183156729705,
162
- "loss": 3.984,
163
- "step": 352
164
- },
165
- {
166
- "epoch": 0.609271523178808,
167
- "grad_norm": 0.10147203505039215,
168
- "learning_rate": 0.0008154761053548404,
169
- "loss": 3.9921,
170
- "step": 368
171
- },
172
- {
173
- "epoch": 0.6357615894039735,
174
- "grad_norm": 0.10544505715370178,
175
- "learning_rate": 0.000798539443279511,
176
- "loss": 3.9585,
177
- "step": 384
178
- },
179
- {
180
- "epoch": 0.6622516556291391,
181
- "grad_norm": 0.10036241263151169,
182
- "learning_rate": 0.0007810527834033009,
183
- "loss": 3.9603,
184
- "step": 400
185
- },
186
- {
187
- "epoch": 0.6887417218543046,
188
- "grad_norm": 0.11926663666963577,
189
- "learning_rate": 0.00076304834131679,
190
- "loss": 3.9728,
191
- "step": 416
192
- },
193
- {
194
- "epoch": 0.7152317880794702,
195
- "grad_norm": 0.16939891874790192,
196
- "learning_rate": 0.0007445592865182695,
197
- "loss": 3.9824,
198
- "step": 432
199
- },
200
- {
201
- "epoch": 0.7417218543046358,
202
- "grad_norm": 0.10274316370487213,
203
- "learning_rate": 0.0007256196813057318,
204
- "loss": 3.979,
205
- "step": 448
206
- },
207
- {
208
- "epoch": 0.7682119205298014,
209
- "grad_norm": 0.09025011956691742,
210
- "learning_rate": 0.0007062644180240614,
211
- "loss": 3.9383,
212
- "step": 464
213
- },
214
- {
215
- "epoch": 0.7947019867549668,
216
- "grad_norm": 0.10321378707885742,
217
- "learning_rate": 0.0006865291547830324,
218
- "loss": 3.9936,
219
- "step": 480
220
- },
221
- {
222
- "epoch": 0.8211920529801324,
223
- "grad_norm": 0.09555677324533463,
224
- "learning_rate": 0.000666450249764542,
225
- "loss": 3.9505,
226
- "step": 496
227
- },
228
- {
229
- "epoch": 0.847682119205298,
230
- "grad_norm": 0.11583372205495834,
231
- "learning_rate": 0.0006460646942401058,
232
- "loss": 3.9876,
233
- "step": 512
234
- },
235
- {
236
- "epoch": 0.8741721854304636,
237
- "grad_norm": 0.09818309545516968,
238
- "learning_rate": 0.0006254100444220115,
239
- "loss": 3.9674,
240
- "step": 528
241
- },
242
- {
243
- "epoch": 0.9006622516556292,
244
- "grad_norm": 0.12466709315776825,
245
- "learning_rate": 0.0006045243522736885,
246
- "loss": 3.9644,
247
- "step": 544
248
- },
249
- {
250
- "epoch": 0.9271523178807947,
251
- "grad_norm": 0.10327574610710144,
252
- "learning_rate": 0.0005834460954067559,
253
- "loss": 3.9836,
254
- "step": 560
255
- },
256
- {
257
- "epoch": 0.9536423841059603,
258
- "grad_norm": 0.10383777320384979,
259
- "learning_rate": 0.0005622141061939006,
260
- "loss": 3.9671,
261
- "step": 576
262
- },
263
- {
264
- "epoch": 0.9801324503311258,
265
- "grad_norm": 0.09883040934801102,
266
- "learning_rate": 0.0005408675002281818,
267
- "loss": 4.0353,
268
- "step": 592
269
- },
270
- {
271
- "epoch": 1.0,
272
- "eval_bleu": 0.08384055402908269,
273
- "eval_cap_loss": 0.8933286312104061,
274
- "eval_con_loss": 1.7258543496495051,
275
- "eval_loss": 2.6191829849552635,
276
- "step": 604
277
- },
278
- {
279
- "epoch": 1.0,
280
- "eval_bleu": 0.08384055402908269,
281
- "eval_cap_loss": 0.8933286312104061,
282
- "eval_con_loss": 1.7258543496495051,
283
- "eval_loss": 2.6191829849552635,
284
- "eval_runtime": 254.3476,
285
- "eval_samples_per_second": 18.986,
286
- "eval_steps_per_second": 2.375,
287
- "step": 604
288
- },
289
- {
290
- "epoch": 1.0066225165562914,
291
- "grad_norm": 0.14657221734523773,
292
- "learning_rate": 0.0005194456042605587,
293
- "loss": 3.9395,
294
- "step": 608
295
- },
296
- {
297
- "epoch": 1.033112582781457,
298
- "grad_norm": 0.08521195501089096,
299
- "learning_rate": 0.0004979878837484043,
300
- "loss": 3.9684,
301
- "step": 624
302
- },
303
- {
304
- "epoch": 1.0596026490066226,
305
- "grad_norm": 0.10343234986066818,
306
- "learning_rate": 0.00047653387014848014,
307
- "loss": 3.9778,
308
- "step": 640
309
- },
310
- {
311
- "epoch": 1.086092715231788,
312
- "grad_norm": 0.1191788986325264,
313
- "learning_rate": 0.0004551230880883208,
314
- "loss": 3.9832,
315
- "step": 656
316
- },
317
- {
318
- "epoch": 1.1125827814569536,
319
- "grad_norm": 0.10124525427818298,
320
- "learning_rate": 0.00043379498255020037,
321
- "loss": 3.9897,
322
- "step": 672
323
- },
324
- {
325
- "epoch": 1.1390728476821192,
326
- "grad_norm": 0.11849219352006912,
327
- "learning_rate": 0.00041258884620182804,
328
- "loss": 3.9687,
329
- "step": 688
330
- },
331
- {
332
- "epoch": 1.1655629139072847,
333
- "grad_norm": 0.12588591873645782,
334
- "learning_rate": 0.00039154374700765316,
335
- "loss": 3.9728,
336
- "step": 704
337
- },
338
- {
339
- "epoch": 1.1920529801324504,
340
- "grad_norm": 0.1349213719367981,
341
- "learning_rate": 0.00037069845625413954,
342
- "loss": 3.9878,
343
- "step": 720
344
- },
345
- {
346
- "epoch": 1.218543046357616,
347
- "grad_norm": 0.14282754063606262,
348
- "learning_rate": 0.0003500913771216081,
349
- "loss": 3.9699,
350
- "step": 736
351
- },
352
- {
353
- "epoch": 1.2450331125827814,
354
- "grad_norm": 0.11279956251382828,
355
- "learning_rate": 0.0003297604739342396,
356
- "loss": 3.9934,
357
- "step": 752
358
- },
359
- {
360
- "epoch": 1.271523178807947,
361
- "grad_norm": 0.13443419337272644,
362
- "learning_rate": 0.00030974320221858066,
363
- "loss": 3.9864,
364
- "step": 768
365
- },
366
- {
367
- "epoch": 1.2980132450331126,
368
- "grad_norm": 0.10932871699333191,
369
- "learning_rate": 0.0002900764396994049,
370
- "loss": 3.9739,
371
- "step": 784
372
- },
373
- {
374
- "epoch": 1.3245033112582782,
375
- "grad_norm": 0.13860152661800385,
376
- "learning_rate": 0.00027079641836005473,
377
- "loss": 4.007,
378
- "step": 800
379
- },
380
- {
381
- "epoch": 1.3509933774834437,
382
- "grad_norm": 0.10841521620750427,
383
- "learning_rate": 0.0002519386576924303,
384
- "loss": 3.9644,
385
- "step": 816
386
- },
387
- {
388
- "epoch": 1.3774834437086092,
389
- "grad_norm": 0.12144733220338821,
390
- "learning_rate": 0.0002335378992595995,
391
- "loss": 3.9399,
392
- "step": 832
393
- },
394
- {
395
- "epoch": 1.403973509933775,
396
- "grad_norm": 0.13902397453784943,
397
- "learning_rate": 0.0002156280426915786,
398
- "loss": 3.9591,
399
- "step": 848
400
- },
401
- {
402
- "epoch": 1.4304635761589404,
403
- "grad_norm": 0.10983491688966751,
404
- "learning_rate": 0.00019824208323220656,
405
- "loss": 3.9748,
406
- "step": 864
407
- },
408
- {
409
- "epoch": 1.4569536423841059,
410
- "grad_norm": 0.11976425349712372,
411
- "learning_rate": 0.00018141205095216294,
412
- "loss": 3.9727,
413
- "step": 880
414
- },
415
- {
416
- "epoch": 1.4834437086092715,
417
- "grad_norm": 0.10822241753339767,
418
- "learning_rate": 0.00016516895174012043,
419
- "loss": 3.9448,
420
- "step": 896
421
- },
422
- {
423
- "epoch": 1.5099337748344372,
424
- "grad_norm": 0.099924236536026,
425
- "learning_rate": 0.00014954271018074368,
426
- "loss": 4.0026,
427
- "step": 912
428
- },
429
- {
430
- "epoch": 1.5364238410596025,
431
- "grad_norm": 0.11151504516601562,
432
- "learning_rate": 0.00013456211442476813,
433
- "loss": 3.9316,
434
- "step": 928
435
- },
436
- {
437
- "epoch": 1.5629139072847682,
438
- "grad_norm": 0.09520285576581955,
439
- "learning_rate": 0.00012025476315272743,
440
- "loss": 3.9738,
441
- "step": 944
442
- },
443
- {
444
- "epoch": 1.589403973509934,
445
- "grad_norm": 0.10799151659011841,
446
- "learning_rate": 0.00010664701473003396,
447
- "loss": 3.9736,
448
- "step": 960
449
- },
450
- {
451
- "epoch": 1.6158940397350994,
452
- "grad_norm": 0.09459669888019562,
453
- "learning_rate": 9.376393864708821e-05,
454
- "loss": 3.9652,
455
- "step": 976
456
- },
457
- {
458
- "epoch": 1.6423841059602649,
459
- "grad_norm": 0.10165199637413025,
460
- "learning_rate": 8.162926933387499e-05,
461
- "loss": 3.9841,
462
- "step": 992
463
- },
464
- {
465
- "epoch": 1.6688741721854305,
466
- "grad_norm": 0.1313178986310959,
467
- "learning_rate": 7.026536243413539e-05,
468
- "loss": 3.98,
469
- "step": 1008
470
- },
471
- {
472
- "epoch": 1.695364238410596,
473
- "grad_norm": 0.1009119376540184,
474
- "learning_rate": 5.969315361967087e-05,
475
- "loss": 3.9513,
476
- "step": 1024
477
- },
478
- {
479
- "epoch": 1.7218543046357615,
480
- "grad_norm": 0.0979105681180954,
481
- "learning_rate": 4.9932120020654116e-05,
482
- "loss": 3.9915,
483
- "step": 1040
484
- },
485
- {
486
- "epoch": 1.7483443708609272,
487
- "grad_norm": 0.11330430954694748,
488
- "learning_rate": 4.100024434300437e-05,
489
- "loss": 3.917,
490
- "step": 1056
491
- },
492
- {
493
- "epoch": 1.7748344370860927,
494
- "grad_norm": 0.10902255028486252,
495
- "learning_rate": 3.2913981738933395e-05,
496
- "loss": 3.9598,
497
- "step": 1072
498
- },
499
- {
500
- "epoch": 1.8013245033112582,
501
- "grad_norm": 0.10667652636766434,
502
- "learning_rate": 2.5688229491697356e-05,
503
- "loss": 4.0058,
504
- "step": 1088
505
- },
506
- {
507
- "epoch": 1.8278145695364238,
508
- "grad_norm": 0.12808123230934143,
509
- "learning_rate": 1.9336299570401396e-05,
510
- "loss": 3.9747,
511
- "step": 1104
512
- },
513
- {
514
- "epoch": 1.8543046357615895,
515
- "grad_norm": 0.08537725359201431,
516
- "learning_rate": 1.3869894105423109e-05,
517
- "loss": 3.9497,
518
- "step": 1120
519
- },
520
- {
521
- "epoch": 1.8807947019867548,
522
- "grad_norm": 0.08839607983827591,
523
- "learning_rate": 9.299083829632516e-06,
524
- "loss": 3.9864,
525
- "step": 1136
526
- },
527
- {
528
- "epoch": 1.9072847682119205,
529
- "grad_norm": 0.1202566921710968,
530
- "learning_rate": 5.632289525129064e-06,
531
- "loss": 3.9474,
532
- "step": 1152
533
- },
534
- {
535
- "epoch": 1.9337748344370862,
536
- "grad_norm": 0.1078442707657814,
537
- "learning_rate": 2.8762665096744854e-06,
538
- "loss": 3.9471,
539
- "step": 1168
540
- },
541
- {
542
- "epoch": 1.9602649006622517,
543
- "grad_norm": 0.09251430630683899,
544
- "learning_rate": 1.036092191402882e-06,
545
- "loss": 3.9503,
546
- "step": 1184
547
- },
548
- {
549
- "epoch": 1.9867549668874172,
550
- "grad_norm": 0.12372814118862152,
551
- "learning_rate": 1.1515671473599775e-07,
552
- "loss": 4.0171,
553
- "step": 1200
554
- },
555
- {
556
- "epoch": 2.0,
557
- "eval_bleu": 0.08026931185165223,
558
- "eval_cap_loss": 0.88608129885023,
559
- "eval_con_loss": 1.7209573078234464,
560
- "eval_loss": 2.607038610226271,
561
- "step": 1208
562
- },
563
- {
564
- "epoch": 2.0,
565
- "eval_bleu": 0.08026931185165223,
566
- "eval_cap_loss": 0.88608129885023,
567
- "eval_con_loss": 1.7209573078234464,
568
- "eval_loss": 2.607038610226271,
569
- "eval_runtime": 259.1818,
570
- "eval_samples_per_second": 18.632,
571
- "eval_steps_per_second": 2.33,
572
- "step": 1208
573
- }
574
- ],
575
- "logging_steps": 16,
576
- "max_steps": 1208,
577
- "num_input_tokens_seen": 0,
578
- "num_train_epochs": 2,
579
- "save_steps": 500,
580
- "stateful_callbacks": {
581
- "TrainerControl": {
582
- "args": {
583
- "should_epoch_stop": false,
584
- "should_evaluate": false,
585
- "should_log": false,
586
- "should_save": true,
587
- "should_training_stop": true
588
- },
589
- "attributes": {}
590
- }
591
- },
592
- "total_flos": 0.0,
593
- "train_batch_size": 32,
594
- "trial_name": null,
595
- "trial_params": null
596
- }