Alawy21 commited on
Commit
23e3af8
·
verified ·
1 Parent(s): eeae366

Training in progress, step 600

Browse files
adapter_config.json CHANGED
@@ -3,7 +3,6 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
5
  "bias": "none",
6
- "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -24,16 +23,15 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
27
  "gate_proj",
28
  "o_proj",
29
- "k_proj",
30
  "q_proj",
31
  "v_proj",
32
- "down_proj",
33
- "up_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
- "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "up_proj",
28
  "gate_proj",
29
  "o_proj",
 
30
  "q_proj",
31
  "v_proj",
32
+ "k_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
 
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba13506dd4f860ffe6cb14d42f5651de35ff54ab023b8a56b2dab77713dae8c2
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94540226d2e8e53f699db37526b100acef9dfebd0f5806d966b2bd48fb0cf357
3
  size 295488936
trainer_log.jsonl CHANGED
@@ -42,3 +42,42 @@
42
  {"current_steps": 390, "total_steps": 600, "loss": 0.0336, "lr": 3.317262619769368e-05, "epoch": 1.95, "percentage": 65.0, "elapsed_time": "2:54:41", "remaining_time": "1:34:04"}
43
  {"current_steps": 400, "total_steps": 600, "loss": 0.028, "lr": 3.046344357553632e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "2:58:51", "remaining_time": "1:29:25"}
44
  {"current_steps": 400, "total_steps": 600, "eval_loss": 0.04492698982357979, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "3:03:15", "remaining_time": "1:31:37"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  {"current_steps": 390, "total_steps": 600, "loss": 0.0336, "lr": 3.317262619769368e-05, "epoch": 1.95, "percentage": 65.0, "elapsed_time": "2:54:41", "remaining_time": "1:34:04"}
43
  {"current_steps": 400, "total_steps": 600, "loss": 0.028, "lr": 3.046344357553632e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "2:58:51", "remaining_time": "1:29:25"}
44
  {"current_steps": 400, "total_steps": 600, "eval_loss": 0.04492698982357979, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "3:03:15", "remaining_time": "1:31:37"}
45
+ {"current_steps": 410, "total_steps": 600, "loss": 0.0177, "lr": 2.7820366476168224e-05, "epoch": 2.05, "percentage": 68.33, "elapsed_time": "3:07:35", "remaining_time": "1:26:56"}
46
+ {"current_steps": 420, "total_steps": 600, "loss": 0.0143, "lr": 2.52523382358473e-05, "epoch": 2.1, "percentage": 70.0, "elapsed_time": "3:11:41", "remaining_time": "1:22:09"}
47
+ {"current_steps": 430, "total_steps": 600, "loss": 0.013, "lr": 2.2768048249248648e-05, "epoch": 2.15, "percentage": 71.67, "elapsed_time": "3:15:52", "remaining_time": "1:17:26"}
48
+ {"current_steps": 440, "total_steps": 600, "loss": 0.0105, "lr": 2.0375902567303472e-05, "epoch": 2.2, "percentage": 73.33, "elapsed_time": "3:19:55", "remaining_time": "1:12:42"}
49
+ {"current_steps": 450, "total_steps": 600, "loss": 0.0136, "lr": 1.80839954537836e-05, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "3:24:01", "remaining_time": "1:08:00"}
50
+ {"current_steps": 460, "total_steps": 600, "loss": 0.0196, "lr": 1.5900081996875083e-05, "epoch": 2.3, "percentage": 76.67, "elapsed_time": "3:28:09", "remaining_time": "1:03:21"}
51
+ {"current_steps": 470, "total_steps": 600, "loss": 0.017, "lr": 1.3831551868414599e-05, "epoch": 2.35, "percentage": 78.33, "elapsed_time": "3:32:17", "remaining_time": "0:58:43"}
52
+ {"current_steps": 480, "total_steps": 600, "loss": 0.0254, "lr": 1.1885404319579108e-05, "epoch": 2.4, "percentage": 80.0, "elapsed_time": "3:36:33", "remaining_time": "0:54:08"}
53
+ {"current_steps": 490, "total_steps": 600, "loss": 0.0174, "lr": 1.006822449763537e-05, "epoch": 2.45, "percentage": 81.67, "elapsed_time": "3:40:45", "remaining_time": "0:49:33"}
54
+ {"current_steps": 500, "total_steps": 600, "loss": 0.013, "lr": 8.38616116388612e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "3:44:56", "remaining_time": "0:44:59"}
55
+ {"current_steps": 500, "total_steps": 600, "eval_loss": 0.048839278519153595, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "3:49:20", "remaining_time": "0:45:52"}
56
+ {"current_steps": 510, "total_steps": 600, "loss": 0.015, "lr": 6.844905888208181e-06, "epoch": 2.55, "percentage": 85.0, "elapsed_time": "3:53:27", "remaining_time": "0:41:11"}
57
+ {"current_steps": 520, "total_steps": 600, "loss": 0.0148, "lr": 5.449673790581611e-06, "epoch": 2.6, "percentage": 86.67, "elapsed_time": "3:57:35", "remaining_time": "0:36:33"}
58
+ {"current_steps": 530, "total_steps": 600, "loss": 0.0156, "lr": 4.205185894774455e-06, "epoch": 2.65, "percentage": 88.33, "elapsed_time": "4:01:38", "remaining_time": "0:31:54"}
59
+ {"current_steps": 540, "total_steps": 600, "loss": 0.015, "lr": 3.115653153892761e-06, "epoch": 2.7, "percentage": 90.0, "elapsed_time": "4:05:49", "remaining_time": "0:27:18"}
60
+ {"current_steps": 550, "total_steps": 600, "loss": 0.0144, "lr": 2.1847622018482283e-06, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "4:09:54", "remaining_time": "0:22:43"}
61
+ {"current_steps": 410, "total_steps": 600, "loss": 0.0177, "lr": 2.7820366476168224e-05, "epoch": 2.05, "percentage": 68.33, "elapsed_time": "0:04:26", "remaining_time": "0:02:03"}
62
+ {"current_steps": 420, "total_steps": 600, "loss": 0.0143, "lr": 2.52523382358473e-05, "epoch": 2.1, "percentage": 70.0, "elapsed_time": "0:08:54", "remaining_time": "0:03:49"}
63
+ {"current_steps": 430, "total_steps": 600, "loss": 0.013, "lr": 2.2768048249248648e-05, "epoch": 2.15, "percentage": 71.67, "elapsed_time": "0:13:27", "remaining_time": "0:05:19"}
64
+ {"current_steps": 440, "total_steps": 600, "loss": 0.0105, "lr": 2.0375902567303472e-05, "epoch": 2.2, "percentage": 73.33, "elapsed_time": "0:17:53", "remaining_time": "0:06:30"}
65
+ {"current_steps": 450, "total_steps": 600, "loss": 0.0136, "lr": 1.80839954537836e-05, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "0:22:21", "remaining_time": "0:07:27"}
66
+ {"current_steps": 460, "total_steps": 600, "loss": 0.0196, "lr": 1.5900081996875083e-05, "epoch": 2.3, "percentage": 76.67, "elapsed_time": "0:26:50", "remaining_time": "0:08:10"}
67
+ {"current_steps": 470, "total_steps": 600, "loss": 0.017, "lr": 1.3831551868414599e-05, "epoch": 2.35, "percentage": 78.33, "elapsed_time": "0:31:21", "remaining_time": "0:08:40"}
68
+ {"current_steps": 480, "total_steps": 600, "loss": 0.0254, "lr": 1.1885404319579108e-05, "epoch": 2.4, "percentage": 80.0, "elapsed_time": "0:35:59", "remaining_time": "0:08:59"}
69
+ {"current_steps": 490, "total_steps": 600, "loss": 0.0174, "lr": 1.006822449763537e-05, "epoch": 2.45, "percentage": 81.67, "elapsed_time": "0:40:35", "remaining_time": "0:09:06"}
70
+ {"current_steps": 500, "total_steps": 600, "loss": 0.013, "lr": 8.38616116388612e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:45:07", "remaining_time": "0:09:01"}
71
+ {"current_steps": 500, "total_steps": 600, "eval_loss": 0.048839278519153595, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:49:52", "remaining_time": "0:09:58"}
72
+ {"current_steps": 510, "total_steps": 600, "loss": 0.015, "lr": 6.844905888208181e-06, "epoch": 2.55, "percentage": 85.0, "elapsed_time": "0:54:22", "remaining_time": "0:09:35"}
73
+ {"current_steps": 520, "total_steps": 600, "loss": 0.0148, "lr": 5.449673790581611e-06, "epoch": 2.6, "percentage": 86.67, "elapsed_time": "0:58:51", "remaining_time": "0:09:03"}
74
+ {"current_steps": 530, "total_steps": 600, "loss": 0.0156, "lr": 4.205185894774455e-06, "epoch": 2.65, "percentage": 88.33, "elapsed_time": "1:03:16", "remaining_time": "0:08:21"}
75
+ {"current_steps": 540, "total_steps": 600, "loss": 0.015, "lr": 3.115653153892761e-06, "epoch": 2.7, "percentage": 90.0, "elapsed_time": "1:07:48", "remaining_time": "0:07:32"}
76
+ {"current_steps": 550, "total_steps": 600, "loss": 0.0144, "lr": 2.1847622018482283e-06, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "1:12:16", "remaining_time": "0:06:34"}
77
+ {"current_steps": 560, "total_steps": 600, "loss": 0.0241, "lr": 1.4156628789559922e-06, "epoch": 2.8, "percentage": 93.33, "elapsed_time": "1:16:50", "remaining_time": "0:05:29"}
78
+ {"current_steps": 570, "total_steps": 600, "loss": 0.0096, "lr": 8.10957573872062e-07, "epoch": 2.85, "percentage": 95.0, "elapsed_time": "1:21:14", "remaining_time": "0:04:16"}
79
+ {"current_steps": 580, "total_steps": 600, "loss": 0.0086, "lr": 3.7269241793390085e-07, "epoch": 2.9, "percentage": 96.67, "elapsed_time": "1:25:46", "remaining_time": "0:02:57"}
80
+ {"current_steps": 590, "total_steps": 600, "loss": 0.0115, "lr": 1.0235036169963242e-07, "epoch": 2.95, "percentage": 98.33, "elapsed_time": "1:30:11", "remaining_time": "0:01:31"}
81
+ {"current_steps": 600, "total_steps": 600, "loss": 0.0116, "lr": 8.461571127882373e-10, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:34:48", "remaining_time": "0:00:00"}
82
+ {"current_steps": 600, "total_steps": 600, "eval_loss": 0.04811210185289383, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:39:32", "remaining_time": "0:00:00"}
83
+ {"current_steps": 600, "total_steps": 600, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:39:53", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041fdd0b2f190d39846b1f9053b04d4db8ecee206aae73de7c3d13c36e559757
3
- size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e79a346e15c4536b7996d95efe34e9057140b3b49e67b076e936202242fc32
3
+ size 5816