Yusra2677 commited on
Commit
779dc57
·
verified ·
1 Parent(s): d95ea3b

Training in progress, step 100

Browse files
adapter_config.json CHANGED
@@ -24,13 +24,13 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
 
 
27
  "k_proj",
28
  "q_proj",
29
- "down_proj",
30
- "up_proj",
31
- "o_proj",
32
- "v_proj",
33
- "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "gate_proj",
28
+ "v_proj",
29
+ "o_proj",
30
+ "down_proj",
31
  "k_proj",
32
  "q_proj",
33
+ "up_proj"
 
 
 
 
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:963fe96cac82dd9e11e2f20551356e2508b2f6ba81e43ef16f472332de41cf49
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23be2a5478c7136b3095ef4df282b740fab9b461e4c277e9e228728b773b861a
3
  size 159967880
trainer_log.jsonl CHANGED
@@ -1,47 +1,10 @@
1
- {"current_steps": 10, "total_steps": 464, "loss": 0.5477, "lr": 3.2142857142857144e-05, "epoch": 0.04310344827586207, "percentage": 2.16, "elapsed_time": "0:02:39", "remaining_time": "2:00:38"}
2
- {"current_steps": 20, "total_steps": 464, "loss": 0.4192, "lr": 4.99847706754774e-05, "epoch": 0.08620689655172414, "percentage": 4.31, "elapsed_time": "0:05:12", "remaining_time": "1:55:46"}
3
- {"current_steps": 30, "total_steps": 464, "loss": 0.2814, "lr": 4.9863047384206835e-05, "epoch": 0.12931034482758622, "percentage": 6.47, "elapsed_time": "0:07:50", "remaining_time": "1:53:25"}
4
- {"current_steps": 40, "total_steps": 464, "loss": 0.2082, "lr": 4.962019382530521e-05, "epoch": 0.1724137931034483, "percentage": 8.62, "elapsed_time": "0:10:32", "remaining_time": "1:51:45"}
5
- {"current_steps": 50, "total_steps": 464, "loss": 0.1599, "lr": 4.925739315689991e-05, "epoch": 0.21551724137931033, "percentage": 10.78, "elapsed_time": "0:13:08", "remaining_time": "1:48:48"}
6
- {"current_steps": 60, "total_steps": 464, "loss": 0.144, "lr": 4.877641290737884e-05, "epoch": 0.25862068965517243, "percentage": 12.93, "elapsed_time": "0:15:36", "remaining_time": "1:45:06"}
7
- {"current_steps": 70, "total_steps": 464, "loss": 0.123, "lr": 4.817959636416969e-05, "epoch": 0.3017241379310345, "percentage": 15.09, "elapsed_time": "0:18:18", "remaining_time": "1:43:05"}
8
- {"current_steps": 80, "total_steps": 464, "loss": 0.1168, "lr": 4.7469851157479177e-05, "epoch": 0.3448275862068966, "percentage": 17.24, "elapsed_time": "0:20:53", "remaining_time": "1:40:17"}
9
- {"current_steps": 90, "total_steps": 464, "loss": 0.1112, "lr": 4.665063509461097e-05, "epoch": 0.3879310344827586, "percentage": 19.4, "elapsed_time": "0:23:35", "remaining_time": "1:38:03"}
10
- {"current_steps": 100, "total_steps": 464, "loss": 0.1005, "lr": 4.572593931387604e-05, "epoch": 0.43103448275862066, "percentage": 21.55, "elapsed_time": "0:26:14", "remaining_time": "1:35:32"}
11
- {"current_steps": 110, "total_steps": 464, "loss": 0.102, "lr": 4.4700268840168045e-05, "epoch": 0.47413793103448276, "percentage": 23.71, "elapsed_time": "0:28:52", "remaining_time": "1:32:56"}
12
- {"current_steps": 120, "total_steps": 464, "loss": 0.103, "lr": 4.357862063693486e-05, "epoch": 0.5172413793103449, "percentage": 25.86, "elapsed_time": "0:31:25", "remaining_time": "1:30:04"}
13
- {"current_steps": 130, "total_steps": 464, "loss": 0.1035, "lr": 4.2366459261474933e-05, "epoch": 0.5603448275862069, "percentage": 28.02, "elapsed_time": "0:34:03", "remaining_time": "1:27:29"}
14
- {"current_steps": 140, "total_steps": 464, "loss": 0.0995, "lr": 4.1069690242163484e-05, "epoch": 0.603448275862069, "percentage": 30.17, "elapsed_time": "0:36:35", "remaining_time": "1:24:40"}
15
- {"current_steps": 150, "total_steps": 464, "loss": 0.0979, "lr": 3.969463130731183e-05, "epoch": 0.646551724137931, "percentage": 32.33, "elapsed_time": "0:39:16", "remaining_time": "1:22:13"}
16
- {"current_steps": 160, "total_steps": 464, "loss": 0.0947, "lr": 3.824798160583012e-05, "epoch": 0.6896551724137931, "percentage": 34.48, "elapsed_time": "0:41:56", "remaining_time": "1:19:41"}
17
- {"current_steps": 170, "total_steps": 464, "loss": 0.0969, "lr": 3.673678906964727e-05, "epoch": 0.7327586206896551, "percentage": 36.64, "elapsed_time": "0:44:34", "remaining_time": "1:17:05"}
18
- {"current_steps": 180, "total_steps": 464, "loss": 0.0905, "lr": 3.516841607689501e-05, "epoch": 0.7758620689655172, "percentage": 38.79, "elapsed_time": "0:47:19", "remaining_time": "1:14:40"}
19
- {"current_steps": 190, "total_steps": 464, "loss": 0.0925, "lr": 3.355050358314172e-05, "epoch": 0.8189655172413793, "percentage": 40.95, "elapsed_time": "0:49:49", "remaining_time": "1:11:51"}
20
- {"current_steps": 200, "total_steps": 464, "loss": 0.0953, "lr": 3.1890933895424976e-05, "epoch": 0.8620689655172413, "percentage": 43.1, "elapsed_time": "0:52:27", "remaining_time": "1:09:14"}
21
- {"current_steps": 210, "total_steps": 464, "loss": 0.0886, "lr": 3.0197792270443982e-05, "epoch": 0.9051724137931034, "percentage": 45.26, "elapsed_time": "0:55:10", "remaining_time": "1:06:44"}
22
- {"current_steps": 220, "total_steps": 464, "loss": 0.0914, "lr": 2.8479327524001636e-05, "epoch": 0.9482758620689655, "percentage": 47.41, "elapsed_time": "0:57:47", "remaining_time": "1:04:05"}
23
- {"current_steps": 230, "total_steps": 464, "loss": 0.0887, "lr": 2.674391184360313e-05, "epoch": 0.9913793103448276, "percentage": 49.57, "elapsed_time": "1:00:22", "remaining_time": "1:01:25"}
24
- {"current_steps": 240, "total_steps": 464, "loss": 0.0993, "lr": 2.5e-05, "epoch": 1.0344827586206897, "percentage": 51.72, "elapsed_time": "1:02:57", "remaining_time": "0:58:45"}
25
- {"current_steps": 250, "total_steps": 464, "loss": 0.0925, "lr": 2.3256088156396868e-05, "epoch": 1.0775862068965518, "percentage": 53.88, "elapsed_time": "1:05:39", "remaining_time": "0:56:12"}
26
- {"current_steps": 260, "total_steps": 464, "loss": 0.086, "lr": 2.1520672475998373e-05, "epoch": 1.1206896551724137, "percentage": 56.03, "elapsed_time": "1:08:18", "remaining_time": "0:53:35"}
27
- {"current_steps": 270, "total_steps": 464, "loss": 0.0885, "lr": 1.980220772955602e-05, "epoch": 1.1637931034482758, "percentage": 58.19, "elapsed_time": "1:10:52", "remaining_time": "0:50:55"}
28
- {"current_steps": 280, "total_steps": 464, "loss": 0.0935, "lr": 1.8109066104575023e-05, "epoch": 1.206896551724138, "percentage": 60.34, "elapsed_time": "1:13:30", "remaining_time": "0:48:18"}
29
- {"current_steps": 290, "total_steps": 464, "loss": 0.089, "lr": 1.6449496416858284e-05, "epoch": 1.25, "percentage": 62.5, "elapsed_time": "1:16:05", "remaining_time": "0:45:39"}
30
- {"current_steps": 300, "total_steps": 464, "loss": 0.0934, "lr": 1.4831583923104999e-05, "epoch": 1.293103448275862, "percentage": 64.66, "elapsed_time": "1:18:45", "remaining_time": "0:43:03"}
31
- {"current_steps": 310, "total_steps": 464, "loss": 0.0902, "lr": 1.3263210930352737e-05, "epoch": 1.3362068965517242, "percentage": 66.81, "elapsed_time": "1:21:18", "remaining_time": "0:40:23"}
32
- {"current_steps": 320, "total_steps": 464, "loss": 0.0915, "lr": 1.175201839416988e-05, "epoch": 1.3793103448275863, "percentage": 68.97, "elapsed_time": "1:23:49", "remaining_time": "0:37:43"}
33
- {"current_steps": 330, "total_steps": 464, "loss": 0.0904, "lr": 1.0305368692688174e-05, "epoch": 1.4224137931034484, "percentage": 71.12, "elapsed_time": "1:26:24", "remaining_time": "0:35:05"}
34
- {"current_steps": 340, "total_steps": 464, "loss": 0.086, "lr": 8.930309757836517e-06, "epoch": 1.4655172413793103, "percentage": 73.28, "elapsed_time": "1:29:11", "remaining_time": "0:32:31"}
35
- {"current_steps": 350, "total_steps": 464, "loss": 0.0883, "lr": 7.633540738525066e-06, "epoch": 1.5086206896551724, "percentage": 75.43, "elapsed_time": "1:31:52", "remaining_time": "0:29:55"}
36
- {"current_steps": 360, "total_steps": 464, "loss": 0.0868, "lr": 6.421379363065142e-06, "epoch": 1.5517241379310345, "percentage": 77.59, "elapsed_time": "1:34:30", "remaining_time": "0:27:18"}
37
- {"current_steps": 370, "total_steps": 464, "loss": 0.0889, "lr": 5.299731159831953e-06, "epoch": 1.5948275862068966, "percentage": 79.74, "elapsed_time": "1:37:09", "remaining_time": "0:24:40"}
38
- {"current_steps": 380, "total_steps": 464, "loss": 0.0905, "lr": 4.274060686123959e-06, "epoch": 1.6379310344827587, "percentage": 81.9, "elapsed_time": "1:39:43", "remaining_time": "0:22:02"}
39
- {"current_steps": 390, "total_steps": 464, "loss": 0.0905, "lr": 3.3493649053890326e-06, "epoch": 1.6810344827586206, "percentage": 84.05, "elapsed_time": "1:42:19", "remaining_time": "0:19:24"}
40
- {"current_steps": 400, "total_steps": 464, "loss": 0.0862, "lr": 2.5301488425208296e-06, "epoch": 1.7241379310344827, "percentage": 86.21, "elapsed_time": "1:44:54", "remaining_time": "0:16:47"}
41
- {"current_steps": 410, "total_steps": 464, "loss": 0.09, "lr": 1.8204036358303173e-06, "epoch": 1.7672413793103448, "percentage": 88.36, "elapsed_time": "1:47:32", "remaining_time": "0:14:09"}
42
- {"current_steps": 420, "total_steps": 464, "loss": 0.0839, "lr": 1.2235870926211619e-06, "epoch": 1.8103448275862069, "percentage": 90.52, "elapsed_time": "1:50:15", "remaining_time": "0:11:33"}
43
- {"current_steps": 430, "total_steps": 464, "loss": 0.088, "lr": 7.426068431000882e-07, "epoch": 1.853448275862069, "percentage": 92.67, "elapsed_time": "1:52:55", "remaining_time": "0:08:55"}
44
- {"current_steps": 440, "total_steps": 464, "loss": 0.0932, "lr": 3.7980617469479953e-07, "epoch": 1.896551724137931, "percentage": 94.83, "elapsed_time": "1:55:27", "remaining_time": "0:06:17"}
45
- {"current_steps": 450, "total_steps": 464, "loss": 0.0842, "lr": 1.3695261579316777e-07, "epoch": 1.9396551724137931, "percentage": 96.98, "elapsed_time": "1:58:04", "remaining_time": "0:03:40"}
46
- {"current_steps": 460, "total_steps": 464, "loss": 0.0894, "lr": 1.522932452260595e-08, "epoch": 1.9827586206896552, "percentage": 99.14, "elapsed_time": "2:00:41", "remaining_time": "0:01:02"}
47
- {"current_steps": 464, "total_steps": 464, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "2:01:44", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 464, "loss": 0.5476, "lr": 3.2142857142857144e-05, "epoch": 0.04310344827586207, "percentage": 2.16, "elapsed_time": "0:00:47", "remaining_time": "0:36:18"}
2
+ {"current_steps": 20, "total_steps": 464, "loss": 0.419, "lr": 4.99847706754774e-05, "epoch": 0.08620689655172414, "percentage": 4.31, "elapsed_time": "0:01:34", "remaining_time": "0:34:49"}
3
+ {"current_steps": 30, "total_steps": 464, "loss": 0.2811, "lr": 4.9863047384206835e-05, "epoch": 0.12931034482758622, "percentage": 6.47, "elapsed_time": "0:02:21", "remaining_time": "0:34:03"}
4
+ {"current_steps": 40, "total_steps": 464, "loss": 0.208, "lr": 4.962019382530521e-05, "epoch": 0.1724137931034483, "percentage": 8.62, "elapsed_time": "0:03:09", "remaining_time": "0:33:27"}
5
+ {"current_steps": 50, "total_steps": 464, "loss": 0.1597, "lr": 4.925739315689991e-05, "epoch": 0.21551724137931033, "percentage": 10.78, "elapsed_time": "0:03:56", "remaining_time": "0:32:34"}
6
+ {"current_steps": 60, "total_steps": 464, "loss": 0.1438, "lr": 4.877641290737884e-05, "epoch": 0.25862068965517243, "percentage": 12.93, "elapsed_time": "0:04:40", "remaining_time": "0:31:29"}
7
+ {"current_steps": 70, "total_steps": 464, "loss": 0.1227, "lr": 4.817959636416969e-05, "epoch": 0.3017241379310345, "percentage": 15.09, "elapsed_time": "0:05:28", "remaining_time": "0:30:51"}
8
+ {"current_steps": 80, "total_steps": 464, "loss": 0.1167, "lr": 4.7469851157479177e-05, "epoch": 0.3448275862068966, "percentage": 17.24, "elapsed_time": "0:06:15", "remaining_time": "0:30:01"}
9
+ {"current_steps": 90, "total_steps": 464, "loss": 0.1111, "lr": 4.665063509461097e-05, "epoch": 0.3879310344827586, "percentage": 19.4, "elapsed_time": "0:07:03", "remaining_time": "0:29:20"}
10
+ {"current_steps": 100, "total_steps": 464, "loss": 0.1005, "lr": 4.572593931387604e-05, "epoch": 0.43103448275862066, "percentage": 21.55, "elapsed_time": "0:07:51", "remaining_time": "0:28:35"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ad3d9a684e0c8115d3b7721cc5b18a17cc143c5e0d2987cdc99098b0f470fe
3
  size 6225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ec8a3538175bdd2ebda6aa6fb3c26601c1675adc5279c11d6dcb563310792f
3
  size 6225