Training in progress, step 100
Browse files- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +10 -47
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -24,13 +24,13 @@
|
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
|
|
|
|
|
|
|
|
27 |
"k_proj",
|
28 |
"q_proj",
|
29 |
-
"
|
30 |
-
"up_proj",
|
31 |
-
"o_proj",
|
32 |
-
"v_proj",
|
33 |
-
"gate_proj"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
36 |
"trainable_token_indices": null,
|
|
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
+
"gate_proj",
|
28 |
+
"v_proj",
|
29 |
+
"o_proj",
|
30 |
+
"down_proj",
|
31 |
"k_proj",
|
32 |
"q_proj",
|
33 |
+
"up_proj"
|
|
|
|
|
|
|
|
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
36 |
"trainable_token_indices": null,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 159967880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23be2a5478c7136b3095ef4df282b740fab9b461e4c277e9e228728b773b861a
|
3 |
size 159967880
|
trainer_log.jsonl
CHANGED
@@ -1,47 +1,10 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 464, "loss": 0.
|
2 |
-
{"current_steps": 20, "total_steps": 464, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 464, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 464, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 464, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 464, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 464, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 464, "loss": 0.
|
9 |
-
{"current_steps": 90, "total_steps": 464, "loss": 0.
|
10 |
-
{"current_steps": 100, "total_steps": 464, "loss": 0.1005, "lr": 4.572593931387604e-05, "epoch": 0.43103448275862066, "percentage": 21.55, "elapsed_time": "0:
|
11 |
-
{"current_steps": 110, "total_steps": 464, "loss": 0.102, "lr": 4.4700268840168045e-05, "epoch": 0.47413793103448276, "percentage": 23.71, "elapsed_time": "0:28:52", "remaining_time": "1:32:56"}
|
12 |
-
{"current_steps": 120, "total_steps": 464, "loss": 0.103, "lr": 4.357862063693486e-05, "epoch": 0.5172413793103449, "percentage": 25.86, "elapsed_time": "0:31:25", "remaining_time": "1:30:04"}
|
13 |
-
{"current_steps": 130, "total_steps": 464, "loss": 0.1035, "lr": 4.2366459261474933e-05, "epoch": 0.5603448275862069, "percentage": 28.02, "elapsed_time": "0:34:03", "remaining_time": "1:27:29"}
|
14 |
-
{"current_steps": 140, "total_steps": 464, "loss": 0.0995, "lr": 4.1069690242163484e-05, "epoch": 0.603448275862069, "percentage": 30.17, "elapsed_time": "0:36:35", "remaining_time": "1:24:40"}
|
15 |
-
{"current_steps": 150, "total_steps": 464, "loss": 0.0979, "lr": 3.969463130731183e-05, "epoch": 0.646551724137931, "percentage": 32.33, "elapsed_time": "0:39:16", "remaining_time": "1:22:13"}
|
16 |
-
{"current_steps": 160, "total_steps": 464, "loss": 0.0947, "lr": 3.824798160583012e-05, "epoch": 0.6896551724137931, "percentage": 34.48, "elapsed_time": "0:41:56", "remaining_time": "1:19:41"}
|
17 |
-
{"current_steps": 170, "total_steps": 464, "loss": 0.0969, "lr": 3.673678906964727e-05, "epoch": 0.7327586206896551, "percentage": 36.64, "elapsed_time": "0:44:34", "remaining_time": "1:17:05"}
|
18 |
-
{"current_steps": 180, "total_steps": 464, "loss": 0.0905, "lr": 3.516841607689501e-05, "epoch": 0.7758620689655172, "percentage": 38.79, "elapsed_time": "0:47:19", "remaining_time": "1:14:40"}
|
19 |
-
{"current_steps": 190, "total_steps": 464, "loss": 0.0925, "lr": 3.355050358314172e-05, "epoch": 0.8189655172413793, "percentage": 40.95, "elapsed_time": "0:49:49", "remaining_time": "1:11:51"}
|
20 |
-
{"current_steps": 200, "total_steps": 464, "loss": 0.0953, "lr": 3.1890933895424976e-05, "epoch": 0.8620689655172413, "percentage": 43.1, "elapsed_time": "0:52:27", "remaining_time": "1:09:14"}
|
21 |
-
{"current_steps": 210, "total_steps": 464, "loss": 0.0886, "lr": 3.0197792270443982e-05, "epoch": 0.9051724137931034, "percentage": 45.26, "elapsed_time": "0:55:10", "remaining_time": "1:06:44"}
|
22 |
-
{"current_steps": 220, "total_steps": 464, "loss": 0.0914, "lr": 2.8479327524001636e-05, "epoch": 0.9482758620689655, "percentage": 47.41, "elapsed_time": "0:57:47", "remaining_time": "1:04:05"}
|
23 |
-
{"current_steps": 230, "total_steps": 464, "loss": 0.0887, "lr": 2.674391184360313e-05, "epoch": 0.9913793103448276, "percentage": 49.57, "elapsed_time": "1:00:22", "remaining_time": "1:01:25"}
|
24 |
-
{"current_steps": 240, "total_steps": 464, "loss": 0.0993, "lr": 2.5e-05, "epoch": 1.0344827586206897, "percentage": 51.72, "elapsed_time": "1:02:57", "remaining_time": "0:58:45"}
|
25 |
-
{"current_steps": 250, "total_steps": 464, "loss": 0.0925, "lr": 2.3256088156396868e-05, "epoch": 1.0775862068965518, "percentage": 53.88, "elapsed_time": "1:05:39", "remaining_time": "0:56:12"}
|
26 |
-
{"current_steps": 260, "total_steps": 464, "loss": 0.086, "lr": 2.1520672475998373e-05, "epoch": 1.1206896551724137, "percentage": 56.03, "elapsed_time": "1:08:18", "remaining_time": "0:53:35"}
|
27 |
-
{"current_steps": 270, "total_steps": 464, "loss": 0.0885, "lr": 1.980220772955602e-05, "epoch": 1.1637931034482758, "percentage": 58.19, "elapsed_time": "1:10:52", "remaining_time": "0:50:55"}
|
28 |
-
{"current_steps": 280, "total_steps": 464, "loss": 0.0935, "lr": 1.8109066104575023e-05, "epoch": 1.206896551724138, "percentage": 60.34, "elapsed_time": "1:13:30", "remaining_time": "0:48:18"}
|
29 |
-
{"current_steps": 290, "total_steps": 464, "loss": 0.089, "lr": 1.6449496416858284e-05, "epoch": 1.25, "percentage": 62.5, "elapsed_time": "1:16:05", "remaining_time": "0:45:39"}
|
30 |
-
{"current_steps": 300, "total_steps": 464, "loss": 0.0934, "lr": 1.4831583923104999e-05, "epoch": 1.293103448275862, "percentage": 64.66, "elapsed_time": "1:18:45", "remaining_time": "0:43:03"}
|
31 |
-
{"current_steps": 310, "total_steps": 464, "loss": 0.0902, "lr": 1.3263210930352737e-05, "epoch": 1.3362068965517242, "percentage": 66.81, "elapsed_time": "1:21:18", "remaining_time": "0:40:23"}
|
32 |
-
{"current_steps": 320, "total_steps": 464, "loss": 0.0915, "lr": 1.175201839416988e-05, "epoch": 1.3793103448275863, "percentage": 68.97, "elapsed_time": "1:23:49", "remaining_time": "0:37:43"}
|
33 |
-
{"current_steps": 330, "total_steps": 464, "loss": 0.0904, "lr": 1.0305368692688174e-05, "epoch": 1.4224137931034484, "percentage": 71.12, "elapsed_time": "1:26:24", "remaining_time": "0:35:05"}
|
34 |
-
{"current_steps": 340, "total_steps": 464, "loss": 0.086, "lr": 8.930309757836517e-06, "epoch": 1.4655172413793103, "percentage": 73.28, "elapsed_time": "1:29:11", "remaining_time": "0:32:31"}
|
35 |
-
{"current_steps": 350, "total_steps": 464, "loss": 0.0883, "lr": 7.633540738525066e-06, "epoch": 1.5086206896551724, "percentage": 75.43, "elapsed_time": "1:31:52", "remaining_time": "0:29:55"}
|
36 |
-
{"current_steps": 360, "total_steps": 464, "loss": 0.0868, "lr": 6.421379363065142e-06, "epoch": 1.5517241379310345, "percentage": 77.59, "elapsed_time": "1:34:30", "remaining_time": "0:27:18"}
|
37 |
-
{"current_steps": 370, "total_steps": 464, "loss": 0.0889, "lr": 5.299731159831953e-06, "epoch": 1.5948275862068966, "percentage": 79.74, "elapsed_time": "1:37:09", "remaining_time": "0:24:40"}
|
38 |
-
{"current_steps": 380, "total_steps": 464, "loss": 0.0905, "lr": 4.274060686123959e-06, "epoch": 1.6379310344827587, "percentage": 81.9, "elapsed_time": "1:39:43", "remaining_time": "0:22:02"}
|
39 |
-
{"current_steps": 390, "total_steps": 464, "loss": 0.0905, "lr": 3.3493649053890326e-06, "epoch": 1.6810344827586206, "percentage": 84.05, "elapsed_time": "1:42:19", "remaining_time": "0:19:24"}
|
40 |
-
{"current_steps": 400, "total_steps": 464, "loss": 0.0862, "lr": 2.5301488425208296e-06, "epoch": 1.7241379310344827, "percentage": 86.21, "elapsed_time": "1:44:54", "remaining_time": "0:16:47"}
|
41 |
-
{"current_steps": 410, "total_steps": 464, "loss": 0.09, "lr": 1.8204036358303173e-06, "epoch": 1.7672413793103448, "percentage": 88.36, "elapsed_time": "1:47:32", "remaining_time": "0:14:09"}
|
42 |
-
{"current_steps": 420, "total_steps": 464, "loss": 0.0839, "lr": 1.2235870926211619e-06, "epoch": 1.8103448275862069, "percentage": 90.52, "elapsed_time": "1:50:15", "remaining_time": "0:11:33"}
|
43 |
-
{"current_steps": 430, "total_steps": 464, "loss": 0.088, "lr": 7.426068431000882e-07, "epoch": 1.853448275862069, "percentage": 92.67, "elapsed_time": "1:52:55", "remaining_time": "0:08:55"}
|
44 |
-
{"current_steps": 440, "total_steps": 464, "loss": 0.0932, "lr": 3.7980617469479953e-07, "epoch": 1.896551724137931, "percentage": 94.83, "elapsed_time": "1:55:27", "remaining_time": "0:06:17"}
|
45 |
-
{"current_steps": 450, "total_steps": 464, "loss": 0.0842, "lr": 1.3695261579316777e-07, "epoch": 1.9396551724137931, "percentage": 96.98, "elapsed_time": "1:58:04", "remaining_time": "0:03:40"}
|
46 |
-
{"current_steps": 460, "total_steps": 464, "loss": 0.0894, "lr": 1.522932452260595e-08, "epoch": 1.9827586206896552, "percentage": 99.14, "elapsed_time": "2:00:41", "remaining_time": "0:01:02"}
|
47 |
-
{"current_steps": 464, "total_steps": 464, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "2:01:44", "remaining_time": "0:00:00"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 464, "loss": 0.5476, "lr": 3.2142857142857144e-05, "epoch": 0.04310344827586207, "percentage": 2.16, "elapsed_time": "0:00:47", "remaining_time": "0:36:18"}
|
2 |
+
{"current_steps": 20, "total_steps": 464, "loss": 0.419, "lr": 4.99847706754774e-05, "epoch": 0.08620689655172414, "percentage": 4.31, "elapsed_time": "0:01:34", "remaining_time": "0:34:49"}
|
3 |
+
{"current_steps": 30, "total_steps": 464, "loss": 0.2811, "lr": 4.9863047384206835e-05, "epoch": 0.12931034482758622, "percentage": 6.47, "elapsed_time": "0:02:21", "remaining_time": "0:34:03"}
|
4 |
+
{"current_steps": 40, "total_steps": 464, "loss": 0.208, "lr": 4.962019382530521e-05, "epoch": 0.1724137931034483, "percentage": 8.62, "elapsed_time": "0:03:09", "remaining_time": "0:33:27"}
|
5 |
+
{"current_steps": 50, "total_steps": 464, "loss": 0.1597, "lr": 4.925739315689991e-05, "epoch": 0.21551724137931033, "percentage": 10.78, "elapsed_time": "0:03:56", "remaining_time": "0:32:34"}
|
6 |
+
{"current_steps": 60, "total_steps": 464, "loss": 0.1438, "lr": 4.877641290737884e-05, "epoch": 0.25862068965517243, "percentage": 12.93, "elapsed_time": "0:04:40", "remaining_time": "0:31:29"}
|
7 |
+
{"current_steps": 70, "total_steps": 464, "loss": 0.1227, "lr": 4.817959636416969e-05, "epoch": 0.3017241379310345, "percentage": 15.09, "elapsed_time": "0:05:28", "remaining_time": "0:30:51"}
|
8 |
+
{"current_steps": 80, "total_steps": 464, "loss": 0.1167, "lr": 4.7469851157479177e-05, "epoch": 0.3448275862068966, "percentage": 17.24, "elapsed_time": "0:06:15", "remaining_time": "0:30:01"}
|
9 |
+
{"current_steps": 90, "total_steps": 464, "loss": 0.1111, "lr": 4.665063509461097e-05, "epoch": 0.3879310344827586, "percentage": 19.4, "elapsed_time": "0:07:03", "remaining_time": "0:29:20"}
|
10 |
+
{"current_steps": 100, "total_steps": 464, "loss": 0.1005, "lr": 4.572593931387604e-05, "epoch": 0.43103448275862066, "percentage": 21.55, "elapsed_time": "0:07:51", "remaining_time": "0:28:35"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6225
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4ec8a3538175bdd2ebda6aa6fb3c26601c1675adc5279c11d6dcb563310792f
|
3 |
size 6225
|