Training in progress, step 441
Browse files- model.safetensors +1 -1
- trainer_log.jsonl +53 -17
- training_args.bin +1 -1
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2471645608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3ed842afad9f65ee8a46b26dad644042f929d12ffc21e037c27f95d1b371fb3
|
| 3 |
size 2471645608
|
trainer_log.jsonl
CHANGED
|
@@ -1,17 +1,53 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps":
|
| 2 |
-
{"current_steps": 20, "total_steps":
|
| 3 |
-
{"current_steps": 30, "total_steps":
|
| 4 |
-
{"current_steps": 40, "total_steps":
|
| 5 |
-
{"current_steps": 50, "total_steps":
|
| 6 |
-
{"current_steps": 50, "total_steps":
|
| 7 |
-
{"current_steps": 60, "total_steps":
|
| 8 |
-
{"current_steps": 70, "total_steps":
|
| 9 |
-
{"current_steps": 80, "total_steps":
|
| 10 |
-
{"current_steps": 90, "total_steps":
|
| 11 |
-
{"current_steps": 100, "total_steps":
|
| 12 |
-
{"current_steps": 100, "total_steps":
|
| 13 |
-
{"current_steps": 110, "total_steps":
|
| 14 |
-
{"current_steps": 120, "total_steps":
|
| 15 |
-
{"current_steps": 130, "total_steps":
|
| 16 |
-
{"current_steps": 140, "total_steps":
|
| 17 |
-
{"current_steps":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 441, "loss": 2.2601, "lr": 2.222222222222222e-06, "epoch": 0.06759611322348964, "percentage": 2.27, "elapsed_time": "0:01:03", "remaining_time": "0:45:37"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 441, "loss": 1.4215, "lr": 4.444444444444444e-06, "epoch": 0.13519222644697929, "percentage": 4.54, "elapsed_time": "0:02:03", "remaining_time": "0:43:26"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 441, "loss": 1.0486, "lr": 6.666666666666667e-06, "epoch": 0.20278833967046894, "percentage": 6.8, "elapsed_time": "0:03:07", "remaining_time": "0:42:48"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 441, "loss": 0.9049, "lr": 8.888888888888888e-06, "epoch": 0.27038445289395857, "percentage": 9.07, "elapsed_time": "0:04:08", "remaining_time": "0:41:27"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 441, "loss": 0.8427, "lr": 9.996066923030484e-06, "epoch": 0.33798056611744826, "percentage": 11.34, "elapsed_time": "0:05:10", "remaining_time": "0:40:25"}
|
| 6 |
+
{"current_steps": 50, "total_steps": 441, "eval_loss": 1.1574571132659912, "epoch": 0.33798056611744826, "percentage": 11.34, "elapsed_time": "0:05:15", "remaining_time": "0:41:08"}
|
| 7 |
+
{"current_steps": 60, "total_steps": 441, "loss": 0.7695, "lr": 9.964639423366442e-06, "epoch": 0.4055766793409379, "percentage": 13.61, "elapsed_time": "0:06:15", "remaining_time": "0:39:45"}
|
| 8 |
+
{"current_steps": 70, "total_steps": 441, "loss": 0.6954, "lr": 9.901982117093786e-06, "epoch": 0.47317279256442757, "percentage": 15.87, "elapsed_time": "0:07:18", "remaining_time": "0:38:42"}
|
| 9 |
+
{"current_steps": 80, "total_steps": 441, "loss": 0.625, "lr": 9.808489146745466e-06, "epoch": 0.5407689057879171, "percentage": 18.14, "elapsed_time": "0:08:19", "remaining_time": "0:37:34"}
|
| 10 |
+
{"current_steps": 90, "total_steps": 441, "loss": 0.5822, "lr": 9.68474862499881e-06, "epoch": 0.6083650190114068, "percentage": 20.41, "elapsed_time": "0:09:20", "remaining_time": "0:36:26"}
|
| 11 |
+
{"current_steps": 100, "total_steps": 441, "loss": 0.5411, "lr": 9.531538935183252e-06, "epoch": 0.6759611322348965, "percentage": 22.68, "elapsed_time": "0:10:24", "remaining_time": "0:35:30"}
|
| 12 |
+
{"current_steps": 100, "total_steps": 441, "eval_loss": 0.5064918994903564, "epoch": 0.6759611322348965, "percentage": 22.68, "elapsed_time": "0:10:30", "remaining_time": "0:35:49"}
|
| 13 |
+
{"current_steps": 110, "total_steps": 441, "loss": 0.5411, "lr": 9.349823834900396e-06, "epoch": 0.7435572454583862, "percentage": 24.94, "elapsed_time": "0:11:31", "remaining_time": "0:34:41"}
|
| 14 |
+
{"current_steps": 120, "total_steps": 441, "loss": 0.5097, "lr": 9.140746393556853e-06, "epoch": 0.8111533586818758, "percentage": 27.21, "elapsed_time": "0:12:33", "remaining_time": "0:33:36"}
|
| 15 |
+
{"current_steps": 130, "total_steps": 441, "loss": 0.508, "lr": 8.905621801945467e-06, "epoch": 0.8787494719053655, "percentage": 29.48, "elapsed_time": "0:13:35", "remaining_time": "0:32:30"}
|
| 16 |
+
{"current_steps": 140, "total_steps": 441, "loss": 0.5154, "lr": 8.645929099105886e-06, "epoch": 0.9463455851288551, "percentage": 31.75, "elapsed_time": "0:14:35", "remaining_time": "0:31:21"}
|
| 17 |
+
{"current_steps": 150, "total_steps": 441, "loss": 0.519, "lr": 8.363301868506264e-06, "epoch": 1.020278833967047, "percentage": 34.01, "elapsed_time": "0:15:38", "remaining_time": "0:30:20"}
|
| 18 |
+
{"current_steps": 150, "total_steps": 441, "eval_loss": 0.436057448387146, "epoch": 1.020278833967047, "percentage": 34.01, "elapsed_time": "0:15:43", "remaining_time": "0:30:31"}
|
| 19 |
+
{"current_steps": 160, "total_steps": 441, "loss": 0.3811, "lr": 8.059517962071234e-06, "epoch": 1.0878749471905365, "percentage": 36.28, "elapsed_time": "0:16:46", "remaining_time": "0:29:27"}
|
| 20 |
+
{"current_steps": 170, "total_steps": 441, "loss": 0.3806, "lr": 7.736488316696663e-06, "epoch": 1.1554710604140261, "percentage": 38.55, "elapsed_time": "0:17:45", "remaining_time": "0:28:18"}
|
| 21 |
+
{"current_steps": 180, "total_steps": 441, "loss": 0.3917, "lr": 7.396244933600285e-06, "epoch": 1.2230671736375158, "percentage": 40.82, "elapsed_time": "0:18:46", "remaining_time": "0:27:13"}
|
| 22 |
+
{"current_steps": 190, "total_steps": 441, "loss": 0.379, "lr": 7.040928096123516e-06, "epoch": 1.2906632868610055, "percentage": 43.08, "elapsed_time": "0:19:48", "remaining_time": "0:26:09"}
|
| 23 |
+
{"current_steps": 200, "total_steps": 441, "loss": 0.3662, "lr": 6.672772906390177e-06, "epoch": 1.3582594000844952, "percentage": 45.35, "elapsed_time": "0:20:50", "remaining_time": "0:25:06"}
|
| 24 |
+
{"current_steps": 200, "total_steps": 441, "eval_loss": 0.40071234107017517, "epoch": 1.3582594000844952, "percentage": 45.35, "elapsed_time": "0:20:55", "remaining_time": "0:25:13"}
|
| 25 |
+
{"current_steps": 210, "total_steps": 441, "loss": 0.3906, "lr": 6.294095225512604e-06, "epoch": 1.4258555133079849, "percentage": 47.62, "elapsed_time": "0:21:57", "remaining_time": "0:24:09"}
|
| 26 |
+
{"current_steps": 220, "total_steps": 441, "loss": 0.37, "lr": 5.907277105787513e-06, "epoch": 1.4934516265314746, "percentage": 49.89, "elapsed_time": "0:22:58", "remaining_time": "0:23:04"}
|
| 27 |
+
{"current_steps": 230, "total_steps": 441, "loss": 0.3647, "lr": 5.514751806519673e-06, "epoch": 1.5610477397549642, "percentage": 52.15, "elapsed_time": "0:24:00", "remaining_time": "0:22:01"}
|
| 28 |
+
{"current_steps": 240, "total_steps": 441, "loss": 0.3761, "lr": 5.118988487730537e-06, "epoch": 1.6286438529784537, "percentage": 54.42, "elapsed_time": "0:25:02", "remaining_time": "0:20:58"}
|
| 29 |
+
{"current_steps": 250, "total_steps": 441, "loss": 0.3679, "lr": 4.7224766780353005e-06, "epoch": 1.6962399662019434, "percentage": 56.69, "elapsed_time": "0:26:03", "remaining_time": "0:19:54"}
|
| 30 |
+
{"current_steps": 250, "total_steps": 441, "eval_loss": 0.39477795362472534, "epoch": 1.6962399662019434, "percentage": 56.69, "elapsed_time": "0:26:09", "remaining_time": "0:19:58"}
|
| 31 |
+
{"current_steps": 260, "total_steps": 441, "loss": 0.3552, "lr": 4.327710614392341e-06, "epoch": 1.763836079425433, "percentage": 58.96, "elapsed_time": "0:27:11", "remaining_time": "0:18:55"}
|
| 32 |
+
{"current_steps": 270, "total_steps": 441, "loss": 0.3635, "lr": 3.937173552235117e-06, "epoch": 1.8314321926489225, "percentage": 61.22, "elapsed_time": "0:28:11", "remaining_time": "0:17:51"}
|
| 33 |
+
{"current_steps": 280, "total_steps": 441, "loss": 0.3491, "lr": 3.553322144682737e-06, "epoch": 1.8990283058724122, "percentage": 63.49, "elapsed_time": "0:29:13", "remaining_time": "0:16:48"}
|
| 34 |
+
{"current_steps": 290, "total_steps": 441, "loss": 0.3524, "lr": 3.178570989091028e-06, "epoch": 1.966624419095902, "percentage": 65.76, "elapsed_time": "0:30:14", "remaining_time": "0:15:44"}
|
| 35 |
+
{"current_steps": 300, "total_steps": 441, "loss": 0.3176, "lr": 2.8152774381532033e-06, "epoch": 2.040557667934094, "percentage": 68.03, "elapsed_time": "0:31:20", "remaining_time": "0:14:43"}
|
| 36 |
+
{"current_steps": 300, "total_steps": 441, "eval_loss": 0.3846343159675598, "epoch": 2.040557667934094, "percentage": 68.03, "elapsed_time": "0:31:25", "remaining_time": "0:14:46"}
|
| 37 |
+
{"current_steps": 310, "total_steps": 441, "loss": 0.2279, "lr": 2.465726771095086e-06, "epoch": 2.1081537811575832, "percentage": 70.29, "elapsed_time": "0:32:27", "remaining_time": "0:13:43"}
|
| 38 |
+
{"current_steps": 320, "total_steps": 441, "loss": 0.219, "lr": 2.132117818244771e-06, "epoch": 2.175749894381073, "percentage": 72.56, "elapsed_time": "0:33:28", "remaining_time": "0:12:39"}
|
| 39 |
+
{"current_steps": 330, "total_steps": 441, "loss": 0.2136, "lr": 1.8165491294045596e-06, "epoch": 2.2433460076045626, "percentage": 74.83, "elapsed_time": "0:34:27", "remaining_time": "0:11:35"}
|
| 40 |
+
{"current_steps": 340, "total_steps": 441, "loss": 0.2164, "lr": 1.521005773032362e-06, "epoch": 2.3109421208280523, "percentage": 77.1, "elapsed_time": "0:35:27", "remaining_time": "0:10:31"}
|
| 41 |
+
{"current_steps": 350, "total_steps": 441, "loss": 0.2141, "lr": 1.2473468492715896e-06, "epoch": 2.378538234051542, "percentage": 79.37, "elapsed_time": "0:36:29", "remaining_time": "0:09:29"}
|
| 42 |
+
{"current_steps": 350, "total_steps": 441, "eval_loss": 0.4075692594051361, "epoch": 2.378538234051542, "percentage": 79.37, "elapsed_time": "0:36:34", "remaining_time": "0:09:30"}
|
| 43 |
+
{"current_steps": 360, "total_steps": 441, "loss": 0.2205, "lr": 9.972937953781985e-07, "epoch": 2.4461343472750317, "percentage": 81.63, "elapsed_time": "0:37:37", "remaining_time": "0:08:27"}
|
| 44 |
+
{"current_steps": 370, "total_steps": 441, "loss": 0.2171, "lr": 7.724195571089787e-07, "epoch": 2.5137304604985213, "percentage": 83.9, "elapsed_time": "0:38:37", "remaining_time": "0:07:24"}
|
| 45 |
+
{"current_steps": 380, "total_steps": 441, "loss": 0.206, "lr": 5.741386941879179e-07, "epoch": 2.581326573722011, "percentage": 86.17, "elapsed_time": "0:39:38", "remaining_time": "0:06:21"}
|
| 46 |
+
{"current_steps": 390, "total_steps": 441, "loss": 0.2126, "lr": 4.036984820916723e-07, "epoch": 2.6489226869455007, "percentage": 88.44, "elapsed_time": "0:40:41", "remaining_time": "0:05:19"}
|
| 47 |
+
{"current_steps": 400, "total_steps": 441, "loss": 0.2089, "lr": 2.621710661279253e-07, "epoch": 2.7165188001689904, "percentage": 90.7, "elapsed_time": "0:41:46", "remaining_time": "0:04:16"}
|
| 48 |
+
{"current_steps": 400, "total_steps": 441, "eval_loss": 0.3996022343635559, "epoch": 2.7165188001689904, "percentage": 90.7, "elapsed_time": "0:41:51", "remaining_time": "0:04:17"}
|
| 49 |
+
{"current_steps": 410, "total_steps": 441, "loss": 0.2188, "lr": 1.5044671716097414e-07, "epoch": 2.78411491339248, "percentage": 92.97, "elapsed_time": "0:42:52", "remaining_time": "0:03:14"}
|
| 50 |
+
{"current_steps": 420, "total_steps": 441, "loss": 0.2107, "lr": 6.922823140906754e-08, "epoch": 2.8517110266159698, "percentage": 95.24, "elapsed_time": "0:43:53", "remaining_time": "0:02:11"}
|
| 51 |
+
{"current_steps": 430, "total_steps": 441, "loss": 0.2084, "lr": 1.9026509541272276e-08, "epoch": 2.919307139839459, "percentage": 97.51, "elapsed_time": "0:44:53", "remaining_time": "0:01:08"}
|
| 52 |
+
{"current_steps": 440, "total_steps": 441, "loss": 0.203, "lr": 1.5734288333457692e-10, "epoch": 2.986903253062949, "percentage": 99.77, "elapsed_time": "0:45:55", "remaining_time": "0:00:06"}
|
| 53 |
+
{"current_steps": 441, "total_steps": 441, "epoch": 2.993662864385298, "percentage": 100.0, "elapsed_time": "0:46:11", "remaining_time": "0:00:00"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f97b8e1f4e13de1327598a66af5846242e032eaa159378e10828a5b918f1b538
|
| 3 |
size 7608
|