Alawy21 commited on
Commit
165a034
·
verified ·
1 Parent(s): 8edc086

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: apache-2.0
4
  base_model: Qwen/Qwen2-VL-2B-Instruct
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: models
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # models
17
 
18
- This model is a fine-tuned version of [Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0481
21
 
 
4
  base_model: Qwen/Qwen2-VL-2B-Instruct
5
  tags:
6
  - llama-factory
7
+ - lora
8
  - generated_from_trainer
9
  model-index:
10
  - name: models
 
16
 
17
  # models
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) on the invoice_train dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0481
22
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.04811210185289383,
4
+ "eval_runtime": 286.7587,
5
+ "eval_samples_per_second": 0.349,
6
+ "eval_steps_per_second": 0.349,
7
+ "total_flos": 3.3505112222539776e+16,
8
+ "train_loss": 0.005030520980556806,
9
+ "train_runtime": 5996.0343,
10
+ "train_samples_per_second": 0.4,
11
+ "train_steps_per_second": 0.1
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.04811210185289383,
4
+ "eval_runtime": 286.7587,
5
+ "eval_samples_per_second": 0.349,
6
+ "eval_steps_per_second": 0.349
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 3.3505112222539776e+16,
4
+ "train_loss": 0.005030520980556806,
5
+ "train_runtime": 5996.0343,
6
+ "train_samples_per_second": 0.4,
7
+ "train_steps_per_second": 0.1
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 100,
7
+ "global_step": 600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.05,
14
+ "grad_norm": 1.7140417098999023,
15
+ "learning_rate": 1.5e-05,
16
+ "loss": 0.7338,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.1,
21
+ "grad_norm": 1.1201739311218262,
22
+ "learning_rate": 3.1666666666666666e-05,
23
+ "loss": 0.3534,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.15,
28
+ "grad_norm": 1.3740745782852173,
29
+ "learning_rate": 4.8333333333333334e-05,
30
+ "loss": 0.1776,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.2,
35
+ "grad_norm": 0.7445772886276245,
36
+ "learning_rate": 6.500000000000001e-05,
37
+ "loss": 0.1226,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.25,
42
+ "grad_norm": 0.6376151442527771,
43
+ "learning_rate": 8.166666666666667e-05,
44
+ "loss": 0.1086,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.3,
49
+ "grad_norm": 0.37051457166671753,
50
+ "learning_rate": 9.833333333333333e-05,
51
+ "loss": 0.0996,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.35,
56
+ "grad_norm": 0.5768002867698669,
57
+ "learning_rate": 9.99314767377287e-05,
58
+ "loss": 0.0959,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.4,
63
+ "grad_norm": 0.5846573114395142,
64
+ "learning_rate": 9.9694847320726e-05,
65
+ "loss": 0.0673,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.45,
70
+ "grad_norm": 0.5731233954429626,
71
+ "learning_rate": 9.929006627092299e-05,
72
+ "loss": 0.1235,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.5,
77
+ "grad_norm": 0.7275082468986511,
78
+ "learning_rate": 9.871850323926177e-05,
79
+ "loss": 0.0779,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.5,
84
+ "eval_loss": 0.0684906542301178,
85
+ "eval_runtime": 263.652,
86
+ "eval_samples_per_second": 0.379,
87
+ "eval_steps_per_second": 0.379,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.55,
92
+ "grad_norm": 0.5399947762489319,
93
+ "learning_rate": 9.798209221411747e-05,
94
+ "loss": 0.0591,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 0.6,
99
+ "grad_norm": 0.39991146326065063,
100
+ "learning_rate": 9.708332497729378e-05,
101
+ "loss": 0.0841,
102
+ "step": 120
103
+ },
104
+ {
105
+ "epoch": 0.65,
106
+ "grad_norm": 0.4473568797111511,
107
+ "learning_rate": 9.602524267262203e-05,
108
+ "loss": 0.0823,
109
+ "step": 130
110
+ },
111
+ {
112
+ "epoch": 0.7,
113
+ "grad_norm": 0.6712111234664917,
114
+ "learning_rate": 9.481142551569318e-05,
115
+ "loss": 0.0686,
116
+ "step": 140
117
+ },
118
+ {
119
+ "epoch": 0.75,
120
+ "grad_norm": 0.3721703886985779,
121
+ "learning_rate": 9.344598067954152e-05,
122
+ "loss": 0.0658,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 0.8,
127
+ "grad_norm": 0.43187215924263,
128
+ "learning_rate": 9.193352839727121e-05,
129
+ "loss": 0.0669,
130
+ "step": 160
131
+ },
132
+ {
133
+ "epoch": 0.85,
134
+ "grad_norm": 0.3821048140525818,
135
+ "learning_rate": 9.027918632864997e-05,
136
+ "loss": 0.0898,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 0.9,
141
+ "grad_norm": 0.7914325594902039,
142
+ "learning_rate": 8.848855224356839e-05,
143
+ "loss": 0.0777,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 0.95,
148
+ "grad_norm": 0.49602773785591125,
149
+ "learning_rate": 8.656768508095853e-05,
150
+ "loss": 0.0602,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.0,
155
+ "grad_norm": 0.36420896649360657,
156
+ "learning_rate": 8.452308444726249e-05,
157
+ "loss": 0.0647,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.0,
162
+ "eval_loss": 0.05108840763568878,
163
+ "eval_runtime": 263.56,
164
+ "eval_samples_per_second": 0.379,
165
+ "eval_steps_per_second": 0.379,
166
+ "step": 200
167
+ },
168
+ {
169
+ "epoch": 1.05,
170
+ "grad_norm": 0.29956021904945374,
171
+ "learning_rate": 8.236166862382163e-05,
172
+ "loss": 0.0381,
173
+ "step": 210
174
+ },
175
+ {
176
+ "epoch": 1.1,
177
+ "grad_norm": 0.3197619915008545,
178
+ "learning_rate": 8.009075115760243e-05,
179
+ "loss": 0.0478,
180
+ "step": 220
181
+ },
182
+ {
183
+ "epoch": 1.15,
184
+ "grad_norm": 0.43458399176597595,
185
+ "learning_rate": 7.771801611446858e-05,
186
+ "loss": 0.0349,
187
+ "step": 230
188
+ },
189
+ {
190
+ "epoch": 1.2,
191
+ "grad_norm": 0.3282864987850189,
192
+ "learning_rate": 7.52514920787345e-05,
193
+ "loss": 0.0309,
194
+ "step": 240
195
+ },
196
+ {
197
+ "epoch": 1.25,
198
+ "grad_norm": 0.24097760021686554,
199
+ "learning_rate": 7.269952498697734e-05,
200
+ "loss": 0.0209,
201
+ "step": 250
202
+ },
203
+ {
204
+ "epoch": 1.3,
205
+ "grad_norm": 0.31138163805007935,
206
+ "learning_rate": 7.007074988802946e-05,
207
+ "loss": 0.0414,
208
+ "step": 260
209
+ },
210
+ {
211
+ "epoch": 1.35,
212
+ "grad_norm": 0.2666899859905243,
213
+ "learning_rate": 6.737406172470657e-05,
214
+ "loss": 0.0592,
215
+ "step": 270
216
+ },
217
+ {
218
+ "epoch": 1.4,
219
+ "grad_norm": 0.513999879360199,
220
+ "learning_rate": 6.461858523613684e-05,
221
+ "loss": 0.0328,
222
+ "step": 280
223
+ },
224
+ {
225
+ "epoch": 1.45,
226
+ "grad_norm": 0.2508058547973633,
227
+ "learning_rate": 6.181364408253209e-05,
228
+ "loss": 0.0256,
229
+ "step": 290
230
+ },
231
+ {
232
+ "epoch": 1.5,
233
+ "grad_norm": 0.22155922651290894,
234
+ "learning_rate": 5.8968729296872874e-05,
235
+ "loss": 0.0292,
236
+ "step": 300
237
+ },
238
+ {
239
+ "epoch": 1.5,
240
+ "eval_loss": 0.04999532178044319,
241
+ "eval_runtime": 263.0996,
242
+ "eval_samples_per_second": 0.38,
243
+ "eval_steps_per_second": 0.38,
244
+ "step": 300
245
+ },
246
+ {
247
+ "epoch": 1.55,
248
+ "grad_norm": 0.5395333170890808,
249
+ "learning_rate": 5.6093467170257374e-05,
250
+ "loss": 0.0468,
251
+ "step": 310
252
+ },
253
+ {
254
+ "epoch": 1.6,
255
+ "grad_norm": 0.28358444571495056,
256
+ "learning_rate": 5.319758667957928e-05,
257
+ "loss": 0.0418,
258
+ "step": 320
259
+ },
260
+ {
261
+ "epoch": 1.65,
262
+ "grad_norm": 0.18217401206493378,
263
+ "learning_rate": 5.0290886567749696e-05,
264
+ "loss": 0.0227,
265
+ "step": 330
266
+ },
267
+ {
268
+ "epoch": 1.7,
269
+ "grad_norm": 0.4007870852947235,
270
+ "learning_rate": 4.738320218785281e-05,
271
+ "loss": 0.0383,
272
+ "step": 340
273
+ },
274
+ {
275
+ "epoch": 1.75,
276
+ "grad_norm": 0.20039933919906616,
277
+ "learning_rate": 4.4484372223424415e-05,
278
+ "loss": 0.0371,
279
+ "step": 350
280
+ },
281
+ {
282
+ "epoch": 1.8,
283
+ "grad_norm": 0.12333797663450241,
284
+ "learning_rate": 4.160420539746115e-05,
285
+ "loss": 0.0331,
286
+ "step": 360
287
+ },
288
+ {
289
+ "epoch": 1.85,
290
+ "grad_norm": 0.3924310803413391,
291
+ "learning_rate": 3.875244728280676e-05,
292
+ "loss": 0.0396,
293
+ "step": 370
294
+ },
295
+ {
296
+ "epoch": 1.9,
297
+ "grad_norm": 0.07961348444223404,
298
+ "learning_rate": 3.593874732621847e-05,
299
+ "loss": 0.033,
300
+ "step": 380
301
+ },
302
+ {
303
+ "epoch": 1.95,
304
+ "grad_norm": 0.5689805746078491,
305
+ "learning_rate": 3.317262619769368e-05,
306
+ "loss": 0.0336,
307
+ "step": 390
308
+ },
309
+ {
310
+ "epoch": 2.0,
311
+ "grad_norm": 0.0888177827000618,
312
+ "learning_rate": 3.046344357553632e-05,
313
+ "loss": 0.028,
314
+ "step": 400
315
+ },
316
+ {
317
+ "epoch": 2.0,
318
+ "eval_loss": 0.04492698982357979,
319
+ "eval_runtime": 263.4857,
320
+ "eval_samples_per_second": 0.38,
321
+ "eval_steps_per_second": 0.38,
322
+ "step": 400
323
+ },
324
+ {
325
+ "epoch": 2.05,
326
+ "grad_norm": 0.4522015452384949,
327
+ "learning_rate": 2.7820366476168224e-05,
328
+ "loss": 0.0177,
329
+ "step": 410
330
+ },
331
+ {
332
+ "epoch": 2.1,
333
+ "grad_norm": 0.037934061139822006,
334
+ "learning_rate": 2.52523382358473e-05,
335
+ "loss": 0.0143,
336
+ "step": 420
337
+ },
338
+ {
339
+ "epoch": 2.15,
340
+ "grad_norm": 0.19100028276443481,
341
+ "learning_rate": 2.2768048249248648e-05,
342
+ "loss": 0.013,
343
+ "step": 430
344
+ },
345
+ {
346
+ "epoch": 2.2,
347
+ "grad_norm": 0.2139115184545517,
348
+ "learning_rate": 2.0375902567303472e-05,
349
+ "loss": 0.0105,
350
+ "step": 440
351
+ },
352
+ {
353
+ "epoch": 2.25,
354
+ "grad_norm": 0.31156909465789795,
355
+ "learning_rate": 1.80839954537836e-05,
356
+ "loss": 0.0136,
357
+ "step": 450
358
+ },
359
+ {
360
+ "epoch": 2.3,
361
+ "grad_norm": 0.08543579280376434,
362
+ "learning_rate": 1.5900081996875083e-05,
363
+ "loss": 0.0196,
364
+ "step": 460
365
+ },
366
+ {
367
+ "epoch": 2.35,
368
+ "grad_norm": 0.10310215502977371,
369
+ "learning_rate": 1.3831551868414599e-05,
370
+ "loss": 0.017,
371
+ "step": 470
372
+ },
373
+ {
374
+ "epoch": 2.4,
375
+ "grad_norm": 0.2742729187011719,
376
+ "learning_rate": 1.1885404319579108e-05,
377
+ "loss": 0.0254,
378
+ "step": 480
379
+ },
380
+ {
381
+ "epoch": 2.45,
382
+ "grad_norm": 0.09017336368560791,
383
+ "learning_rate": 1.006822449763537e-05,
384
+ "loss": 0.0174,
385
+ "step": 490
386
+ },
387
+ {
388
+ "epoch": 2.5,
389
+ "grad_norm": 0.07442392408847809,
390
+ "learning_rate": 8.38616116388612e-06,
391
+ "loss": 0.013,
392
+ "step": 500
393
+ },
394
+ {
395
+ "epoch": 2.5,
396
+ "eval_loss": 0.048839278519153595,
397
+ "eval_runtime": 284.8522,
398
+ "eval_samples_per_second": 0.351,
399
+ "eval_steps_per_second": 0.351,
400
+ "step": 500
401
+ },
402
+ {
403
+ "epoch": 2.55,
404
+ "grad_norm": 0.29669731855392456,
405
+ "learning_rate": 6.844905888208181e-06,
406
+ "loss": 0.015,
407
+ "step": 510
408
+ },
409
+ {
410
+ "epoch": 2.6,
411
+ "grad_norm": 0.3099665939807892,
412
+ "learning_rate": 5.449673790581611e-06,
413
+ "loss": 0.0148,
414
+ "step": 520
415
+ },
416
+ {
417
+ "epoch": 2.65,
418
+ "grad_norm": 0.20635627210140228,
419
+ "learning_rate": 4.205185894774455e-06,
420
+ "loss": 0.0156,
421
+ "step": 530
422
+ },
423
+ {
424
+ "epoch": 2.7,
425
+ "grad_norm": 0.2796818017959595,
426
+ "learning_rate": 3.115653153892761e-06,
427
+ "loss": 0.015,
428
+ "step": 540
429
+ },
430
+ {
431
+ "epoch": 2.75,
432
+ "grad_norm": 0.15870630741119385,
433
+ "learning_rate": 2.1847622018482283e-06,
434
+ "loss": 0.0144,
435
+ "step": 550
436
+ },
437
+ {
438
+ "epoch": 2.8,
439
+ "grad_norm": 0.23704785108566284,
440
+ "learning_rate": 1.4156628789559922e-06,
441
+ "loss": 0.0241,
442
+ "step": 560
443
+ },
444
+ {
445
+ "epoch": 2.85,
446
+ "grad_norm": 0.025902193039655685,
447
+ "learning_rate": 8.10957573872062e-07,
448
+ "loss": 0.0096,
449
+ "step": 570
450
+ },
451
+ {
452
+ "epoch": 2.9,
453
+ "grad_norm": 0.04191463440656662,
454
+ "learning_rate": 3.7269241793390085e-07,
455
+ "loss": 0.0086,
456
+ "step": 580
457
+ },
458
+ {
459
+ "epoch": 2.95,
460
+ "grad_norm": 0.04461716488003731,
461
+ "learning_rate": 1.0235036169963242e-07,
462
+ "loss": 0.0115,
463
+ "step": 590
464
+ },
465
+ {
466
+ "epoch": 3.0,
467
+ "grad_norm": 0.21583612263202667,
468
+ "learning_rate": 8.461571127882373e-10,
469
+ "loss": 0.0116,
470
+ "step": 600
471
+ },
472
+ {
473
+ "epoch": 3.0,
474
+ "eval_loss": 0.04811210185289383,
475
+ "eval_runtime": 284.4632,
476
+ "eval_samples_per_second": 0.352,
477
+ "eval_steps_per_second": 0.352,
478
+ "step": 600
479
+ },
480
+ {
481
+ "epoch": 3.0,
482
+ "step": 600,
483
+ "total_flos": 3.3505112222539776e+16,
484
+ "train_loss": 0.005030520980556806,
485
+ "train_runtime": 5996.0343,
486
+ "train_samples_per_second": 0.4,
487
+ "train_steps_per_second": 0.1
488
+ }
489
+ ],
490
+ "logging_steps": 10,
491
+ "max_steps": 600,
492
+ "num_input_tokens_seen": 0,
493
+ "num_train_epochs": 3,
494
+ "save_steps": 200,
495
+ "stateful_callbacks": {
496
+ "TrainerControl": {
497
+ "args": {
498
+ "should_epoch_stop": false,
499
+ "should_evaluate": false,
500
+ "should_log": false,
501
+ "should_save": true,
502
+ "should_training_stop": true
503
+ },
504
+ "attributes": {}
505
+ }
506
+ },
507
+ "total_flos": 3.3505112222539776e+16,
508
+ "train_batch_size": 1,
509
+ "trial_name": null,
510
+ "trial_params": null
511
+ }
training_eval_loss.png ADDED
training_loss.png ADDED