fix(tokenizer): expose `errors`
Browse files
tokenization_arcade100k.py
CHANGED
|
@@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|
| 111 |
**kwargs,
|
| 112 |
):
|
| 113 |
super().__init__(errors=errors, **kwargs)
|
|
|
|
|
|
|
| 114 |
self._tiktoken_config = _arcade100k(vocab_file)
|
| 115 |
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
| 116 |
|
|
|
|
| 111 |
**kwargs,
|
| 112 |
):
|
| 113 |
super().__init__(errors=errors, **kwargs)
|
| 114 |
+
self.errors = errors
|
| 115 |
+
|
| 116 |
self._tiktoken_config = _arcade100k(vocab_file)
|
| 117 |
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
| 118 |
|