Training in progress, step 200
Browse files- adapter_config.json +17 -0
- adapter_model.safetensors +3 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- trainer_log.jsonl +41 -0
- training_args.bin +3 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"auto_mapping": null,
|
| 3 |
+
"base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 4 |
+
"inference_mode": true,
|
| 5 |
+
"num_attention_heads": 32,
|
| 6 |
+
"num_layers": 32,
|
| 7 |
+
"num_transformer_submodules": 1,
|
| 8 |
+
"num_virtual_tokens": 100,
|
| 9 |
+
"peft_type": "PROMPT_TUNING",
|
| 10 |
+
"prompt_tuning_init": "RANDOM",
|
| 11 |
+
"prompt_tuning_init_text": null,
|
| 12 |
+
"revision": null,
|
| 13 |
+
"task_type": "CAUSAL_LM",
|
| 14 |
+
"token_dim": 4096,
|
| 15 |
+
"tokenizer_kwargs": null,
|
| 16 |
+
"tokenizer_name_or_path": null
|
| 17 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13e79d815d1d4bd58f74dfefa11bc3f414a298c218ed76e76dd1ee99d23d689f
|
| 3 |
+
size 1638528
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
|
| 3 |
+
size 587404
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
trainer_log.jsonl
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 5, "total_steps": 40000, "loss": 2.7245, "lr": 0.29999999259779675, "epoch": 0.035650623885918005, "percentage": 0.01, "elapsed_time": "0:00:06", "remaining_time": "13:39:49", "throughput": 2398.9, "total_tokens": 14752}
|
| 2 |
+
{"current_steps": 10, "total_steps": 40000, "loss": 0.3322, "lr": 0.29999996252634736, "epoch": 0.07130124777183601, "percentage": 0.03, "elapsed_time": "0:00:10", "remaining_time": "11:29:29", "throughput": 2678.8, "total_tokens": 27712}
|
| 3 |
+
{"current_steps": 15, "total_steps": 40000, "loss": 0.1813, "lr": 0.2999999093230187, "epoch": 0.10695187165775401, "percentage": 0.04, "elapsed_time": "0:00:14", "remaining_time": "10:48:48", "throughput": 2780.64, "total_tokens": 40608}
|
| 4 |
+
{"current_steps": 20, "total_steps": 40000, "loss": 0.1593, "lr": 0.299999832987819, "epoch": 0.14260249554367202, "percentage": 0.05, "elapsed_time": "0:00:19", "remaining_time": "10:37:53", "throughput": 2898.08, "total_tokens": 55488}
|
| 5 |
+
{"current_steps": 25, "total_steps": 40000, "loss": 0.1608, "lr": 0.29999973352076004, "epoch": 0.17825311942959002, "percentage": 0.06, "elapsed_time": "0:00:23", "remaining_time": "10:24:20", "throughput": 2929.91, "total_tokens": 68640}
|
| 6 |
+
{"current_steps": 30, "total_steps": 40000, "loss": 0.1488, "lr": 0.2999996109218572, "epoch": 0.21390374331550802, "percentage": 0.07, "elapsed_time": "0:00:27", "remaining_time": "10:14:03", "throughput": 2946.2, "total_tokens": 81472}
|
| 7 |
+
{"current_steps": 35, "total_steps": 40000, "loss": 0.1464, "lr": 0.2999994651911293, "epoch": 0.24955436720142601, "percentage": 0.09, "elapsed_time": "0:00:32", "remaining_time": "10:09:03", "throughput": 2974.67, "total_tokens": 95200}
|
| 8 |
+
{"current_steps": 40, "total_steps": 40000, "loss": 0.1442, "lr": 0.2999992963285989, "epoch": 0.28520499108734404, "percentage": 0.1, "elapsed_time": "0:00:36", "remaining_time": "10:07:02", "throughput": 2997.35, "total_tokens": 109280}
|
| 9 |
+
{"current_steps": 45, "total_steps": 40000, "loss": 0.1574, "lr": 0.29999910433429194, "epoch": 0.32085561497326204, "percentage": 0.11, "elapsed_time": "0:00:40", "remaining_time": "10:01:19", "throughput": 3001.15, "total_tokens": 121952}
|
| 10 |
+
{"current_steps": 50, "total_steps": 40000, "loss": 0.1479, "lr": 0.29999888920823814, "epoch": 0.35650623885918004, "percentage": 0.12, "elapsed_time": "0:00:44", "remaining_time": "9:57:43", "throughput": 3003.54, "total_tokens": 134816}
|
| 11 |
+
{"current_steps": 55, "total_steps": 40000, "loss": 0.1465, "lr": 0.29999865095047057, "epoch": 0.39215686274509803, "percentage": 0.14, "elapsed_time": "0:00:49", "remaining_time": "9:54:53", "throughput": 3010.13, "total_tokens": 147936}
|
| 12 |
+
{"current_steps": 60, "total_steps": 40000, "loss": 0.1691, "lr": 0.29999838956102604, "epoch": 0.42780748663101603, "percentage": 0.15, "elapsed_time": "0:00:53", "remaining_time": "9:54:42", "throughput": 3031.4, "total_tokens": 162496}
|
| 13 |
+
{"current_steps": 65, "total_steps": 40000, "loss": 0.1491, "lr": 0.29999810503994484, "epoch": 0.46345811051693403, "percentage": 0.16, "elapsed_time": "0:00:58", "remaining_time": "9:54:22", "throughput": 3046.98, "total_tokens": 176864}
|
| 14 |
+
{"current_steps": 70, "total_steps": 40000, "loss": 0.1632, "lr": 0.29999779738727084, "epoch": 0.49910873440285203, "percentage": 0.18, "elapsed_time": "0:01:02", "remaining_time": "9:54:09", "throughput": 3056.82, "total_tokens": 191040}
|
| 15 |
+
{"current_steps": 75, "total_steps": 40000, "loss": 0.1464, "lr": 0.29999746660305154, "epoch": 0.5347593582887701, "percentage": 0.19, "elapsed_time": "0:01:06", "remaining_time": "9:52:59", "throughput": 3058.39, "total_tokens": 204416}
|
| 16 |
+
{"current_steps": 80, "total_steps": 40000, "loss": 0.1304, "lr": 0.2999971126873379, "epoch": 0.5704099821746881, "percentage": 0.2, "elapsed_time": "0:01:11", "remaining_time": "9:51:38", "throughput": 3062.37, "total_tokens": 217856}
|
| 17 |
+
{"current_steps": 85, "total_steps": 40000, "loss": 0.1428, "lr": 0.2999967356401845, "epoch": 0.6060606060606061, "percentage": 0.21, "elapsed_time": "0:01:15", "remaining_time": "9:51:30", "throughput": 3068.42, "total_tokens": 231904}
|
| 18 |
+
{"current_steps": 90, "total_steps": 40000, "loss": 0.143, "lr": 0.29999633546164944, "epoch": 0.6417112299465241, "percentage": 0.22, "elapsed_time": "0:01:19", "remaining_time": "9:51:07", "throughput": 3073.09, "total_tokens": 245792}
|
| 19 |
+
{"current_steps": 95, "total_steps": 40000, "loss": 0.1495, "lr": 0.29999591215179444, "epoch": 0.6773618538324421, "percentage": 0.24, "elapsed_time": "0:01:24", "remaining_time": "9:51:01", "throughput": 3078.63, "total_tokens": 259904}
|
| 20 |
+
{"current_steps": 100, "total_steps": 40000, "loss": 0.143, "lr": 0.2999954657106849, "epoch": 0.7130124777183601, "percentage": 0.25, "elapsed_time": "0:01:28", "remaining_time": "9:49:16", "throughput": 3076.76, "total_tokens": 272640}
|
| 21 |
+
{"current_steps": 105, "total_steps": 40000, "loss": 0.1439, "lr": 0.2999949961383896, "epoch": 0.7486631016042781, "percentage": 0.26, "elapsed_time": "0:01:32", "remaining_time": "9:48:13", "throughput": 3074.99, "total_tokens": 285632}
|
| 22 |
+
{"current_steps": 110, "total_steps": 40000, "loss": 0.1404, "lr": 0.2999945034349809, "epoch": 0.7843137254901961, "percentage": 0.27, "elapsed_time": "0:01:37", "remaining_time": "9:48:29", "throughput": 3082.04, "total_tokens": 300096}
|
| 23 |
+
{"current_steps": 115, "total_steps": 40000, "loss": 0.1445, "lr": 0.2999939876005348, "epoch": 0.8199643493761141, "percentage": 0.29, "elapsed_time": "0:01:41", "remaining_time": "9:48:01", "throughput": 3085.91, "total_tokens": 313920}
|
| 24 |
+
{"current_steps": 120, "total_steps": 40000, "loss": 0.1404, "lr": 0.29999344863513094, "epoch": 0.8556149732620321, "percentage": 0.3, "elapsed_time": "0:01:45", "remaining_time": "9:46:46", "throughput": 3085.31, "total_tokens": 326848}
|
| 25 |
+
{"current_steps": 125, "total_steps": 40000, "loss": 0.1412, "lr": 0.2999928865388523, "epoch": 0.8912655971479501, "percentage": 0.31, "elapsed_time": "0:01:50", "remaining_time": "9:45:37", "throughput": 3082.67, "total_tokens": 339552}
|
| 26 |
+
{"current_steps": 130, "total_steps": 40000, "loss": 0.1394, "lr": 0.29999230131178567, "epoch": 0.9269162210338681, "percentage": 0.33, "elapsed_time": "0:01:54", "remaining_time": "9:44:25", "throughput": 3080.34, "total_tokens": 352192}
|
| 27 |
+
{"current_steps": 135, "total_steps": 40000, "loss": 0.1464, "lr": 0.2999916929540212, "epoch": 0.9625668449197861, "percentage": 0.34, "elapsed_time": "0:01:58", "remaining_time": "9:43:20", "throughput": 3078.62, "total_tokens": 364896}
|
| 28 |
+
{"current_steps": 140, "total_steps": 40000, "loss": 0.1442, "lr": 0.29999106146565285, "epoch": 0.9982174688057041, "percentage": 0.35, "elapsed_time": "0:02:02", "remaining_time": "9:43:15", "throughput": 3081.42, "total_tokens": 378752}
|
| 29 |
+
{"current_steps": 145, "total_steps": 40000, "loss": 0.1522, "lr": 0.29999040684677786, "epoch": 1.0285204991087344, "percentage": 0.36, "elapsed_time": "0:02:06", "remaining_time": "9:41:26", "throughput": 3077.84, "total_tokens": 390648}
|
| 30 |
+
{"current_steps": 150, "total_steps": 40000, "loss": 0.1415, "lr": 0.2999897290974972, "epoch": 1.0641711229946524, "percentage": 0.38, "elapsed_time": "0:02:11", "remaining_time": "9:40:54", "throughput": 3079.54, "total_tokens": 404024}
|
| 31 |
+
{"current_steps": 155, "total_steps": 40000, "loss": 0.1485, "lr": 0.2999890282179155, "epoch": 1.0998217468805704, "percentage": 0.39, "elapsed_time": "0:02:15", "remaining_time": "9:40:50", "throughput": 3081.4, "total_tokens": 417752}
|
| 32 |
+
{"current_steps": 160, "total_steps": 40000, "loss": 0.1374, "lr": 0.29998830420814077, "epoch": 1.1354723707664884, "percentage": 0.4, "elapsed_time": "0:02:20", "remaining_time": "9:41:12", "throughput": 3083.6, "total_tokens": 431864}
|
| 33 |
+
{"current_steps": 165, "total_steps": 40000, "loss": 0.1468, "lr": 0.2999875570682846, "epoch": 1.1711229946524064, "percentage": 0.41, "elapsed_time": "0:02:24", "remaining_time": "9:41:38", "throughput": 3089.21, "total_tokens": 446552}
|
| 34 |
+
{"current_steps": 170, "total_steps": 40000, "loss": 0.1496, "lr": 0.2999867867984623, "epoch": 1.2067736185383244, "percentage": 0.43, "elapsed_time": "0:02:28", "remaining_time": "9:41:02", "throughput": 3090.27, "total_tokens": 459832}
|
| 35 |
+
{"current_steps": 175, "total_steps": 40000, "loss": 0.1503, "lr": 0.29998599339879267, "epoch": 1.2424242424242424, "percentage": 0.44, "elapsed_time": "0:02:33", "remaining_time": "9:40:28", "throughput": 3089.68, "total_tokens": 472856}
|
| 36 |
+
{"current_steps": 180, "total_steps": 40000, "loss": 0.135, "lr": 0.29998517686939796, "epoch": 1.2780748663101604, "percentage": 0.45, "elapsed_time": "0:02:37", "remaining_time": "9:40:16", "throughput": 3089.87, "total_tokens": 486296}
|
| 37 |
+
{"current_steps": 185, "total_steps": 40000, "loss": 0.1371, "lr": 0.29998433721040413, "epoch": 1.3137254901960784, "percentage": 0.46, "elapsed_time": "0:02:41", "remaining_time": "9:40:45", "throughput": 3093.42, "total_tokens": 500856}
|
| 38 |
+
{"current_steps": 190, "total_steps": 40000, "loss": 0.1471, "lr": 0.29998347442194073, "epoch": 1.3493761140819964, "percentage": 0.47, "elapsed_time": "0:02:46", "remaining_time": "9:40:35", "throughput": 3092.43, "total_tokens": 514136}
|
| 39 |
+
{"current_steps": 195, "total_steps": 40000, "loss": 0.1452, "lr": 0.2999825885041407, "epoch": 1.3850267379679144, "percentage": 0.49, "elapsed_time": "0:02:50", "remaining_time": "9:40:08", "throughput": 3090.16, "total_tokens": 526936}
|
| 40 |
+
{"current_steps": 200, "total_steps": 40000, "loss": 0.1456, "lr": 0.29998167945714077, "epoch": 1.4206773618538324, "percentage": 0.5, "elapsed_time": "0:02:54", "remaining_time": "9:39:52", "throughput": 3090.21, "total_tokens": 540280}
|
| 41 |
+
{"current_steps": 200, "total_steps": 40000, "eval_loss": 0.1407952606678009, "epoch": 1.4206773618538324, "percentage": 0.5, "elapsed_time": "0:03:01", "remaining_time": "10:00:54", "throughput": 2982.01, "total_tokens": 540280}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a9427620fa90cddd25984a3e04a8d04223c64746fa021e84d3e58614bd9a3d1
|
| 3 |
+
size 5752
|