FremyCompany
commited on
Update pick_best_tokenizer.py
Browse files- pick_best_tokenizer.py +1 -0
pick_best_tokenizer.py
CHANGED
@@ -12,6 +12,7 @@ class PickBestTokenizer(PreTrainedTokenizer):
|
|
12 |
self.tokenizers_offsets = []
|
13 |
self.vocab = {}
|
14 |
self._vocab_size = sum(len(tokenizer) for tokenizer in self.tokenizers)
|
|
|
15 |
|
16 |
offset = 0
|
17 |
for i, tokenizer in enumerate(self.tokenizers):
|
|
|
12 |
self.tokenizers_offsets = []
|
13 |
self.vocab = {}
|
14 |
self._vocab_size = sum(len(tokenizer) for tokenizer in self.tokenizers)
|
15 |
+
self.pad_token = '[0]'+self.tokenizers[0].pad_token
|
16 |
|
17 |
offset = 0
|
18 |
for i, tokenizer in enumerate(self.tokenizers):
|