khopilot commited on
Commit
cab1437
·
verified ·
1 Parent(s): f72f63a

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +5 -20
tokenizer_config.json CHANGED
@@ -1,24 +1,9 @@
1
  {
2
- "tokenizer_class": "T5Tokenizer",
3
- "model_max_length": 512,
4
- "padding_side": "right",
5
- "unk_token": "<unk>",
6
- "bos_token": "<s>",
7
  "eos_token": "</s>",
 
 
8
  "pad_token": "<pad>",
9
- "additional_special_tokens": [
10
- "<mask>",
11
- "<cls>",
12
- "<sep>"
13
- ],
14
- "sp_model_kwargs": {},
15
- "add_bos_token": false,
16
- "add_eos_token": false,
17
- "clean_up_tokenization_spaces": true,
18
- "do_lower_case": false,
19
- "keep_accents": true,
20
- "legacy": true,
21
- "use_fast": true,
22
- "vocab_file": "spiece.model",
23
- "model_type": "sentencepiece"
24
  }
 
1
  {
2
+ "clean_up_tokenization_spaces": true,
 
 
 
 
3
  "eos_token": "</s>",
4
+ "extra_ids": 100,
5
+ "model_max_length": 512,
6
  "pad_token": "<pad>",
7
+ "tokenizer_class": "T5Tokenizer",
8
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }