Add BOS token to config
Browse filesThe tokenizer already has the BOS token `<|startoftext|>` in its vocabulary, but it is currently not set in the configuration and thus not used. This is causing issues with several downstream libraries that depend on the existence of a BOS token. This PR simply sets it.
- config.json +3 -1
config.json
CHANGED
@@ -4,7 +4,9 @@
|
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
7 |
-
"
|
|
|
|
|
8 |
"eos_token_id": 11,
|
9 |
"head_dim": 256,
|
10 |
"hidden_act": "silu",
|
|
|
4 |
],
|
5 |
"attention_bias": false,
|
6 |
"attention_dropout": 0.0,
|
7 |
+
"bos_token": "<|startoftext|>",
|
8 |
+
"bos_token_id": 10,
|
9 |
+
"eos_token": "<|endoftext|>",
|
10 |
"eos_token_id": 11,
|
11 |
"head_dim": 256,
|
12 |
"hidden_act": "silu",
|