| name: wordpiece_tokenizer | |
| config_type: preprocessor | |
| truncation_side: right | |
| padding_side: right | |
| stride: 0 | |
| pad_to_multiple_of: 0 | |
| pad_token_type_id: 0 | |
| unk_token: '[UNK]' | |
| sep_token: '[SEP]' | |
| pad_token: '[PAD]' | |
| cls_token: '[CLS]' | |
| mask_token: '[MASK]' | |
| wordpieces_prefix: '##' | |
| vocab_size: 42000 | |
| min_frequency: 2 | |
| limit_alphabet: 1000 | |
| initial_alphabet: [] | |
| show_progress: true | |