add tokenizer
Browse files- special_tokens_map.json +15 -1
 - tokenizer.json +4 -2
 - tokenizer_config.json +16 -1
 
    	
        special_tokens_map.json
    CHANGED
    
    | 
         @@ -1 +1,15 @@ 
     | 
|
| 1 | 
         
            -
            { 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "bos_token": "<s>",
         
     | 
| 3 | 
         
            +
              "cls_token": "<s>",
         
     | 
| 4 | 
         
            +
              "eos_token": "</s>",
         
     | 
| 5 | 
         
            +
              "mask_token": {
         
     | 
| 6 | 
         
            +
                "content": "<mask>",
         
     | 
| 7 | 
         
            +
                "lstrip": true,
         
     | 
| 8 | 
         
            +
                "normalized": false,
         
     | 
| 9 | 
         
            +
                "rstrip": false,
         
     | 
| 10 | 
         
            +
                "single_word": false
         
     | 
| 11 | 
         
            +
              },
         
     | 
| 12 | 
         
            +
              "pad_token": "<pad>",
         
     | 
| 13 | 
         
            +
              "sep_token": "</s>",
         
     | 
| 14 | 
         
            +
              "unk_token": "<unk>"
         
     | 
| 15 | 
         
            +
            }
         
     | 
    	
        tokenizer.json
    CHANGED
    
    | 
         @@ -53,7 +53,8 @@ 
     | 
|
| 53 | 
         
             
              "pre_tokenizer": {
         
     | 
| 54 | 
         
             
                "type": "ByteLevel",
         
     | 
| 55 | 
         
             
                "add_prefix_space": false,
         
     | 
| 56 | 
         
            -
                "trim_offsets": true
         
     | 
| 
         | 
|
| 57 | 
         
             
              },
         
     | 
| 58 | 
         
             
              "post_processor": {
         
     | 
| 59 | 
         
             
                "type": "RobertaProcessing",
         
     | 
| 
         @@ -71,7 +72,8 @@ 
     | 
|
| 71 | 
         
             
              "decoder": {
         
     | 
| 72 | 
         
             
                "type": "ByteLevel",
         
     | 
| 73 | 
         
             
                "add_prefix_space": true,
         
     | 
| 74 | 
         
            -
                "trim_offsets": true
         
     | 
| 
         | 
|
| 75 | 
         
             
              },
         
     | 
| 76 | 
         
             
              "model": {
         
     | 
| 77 | 
         
             
                "type": "BPE",
         
     | 
| 
         | 
|
| 53 | 
         
             
              "pre_tokenizer": {
         
     | 
| 54 | 
         
             
                "type": "ByteLevel",
         
     | 
| 55 | 
         
             
                "add_prefix_space": false,
         
     | 
| 56 | 
         
            +
                "trim_offsets": true,
         
     | 
| 57 | 
         
            +
                "use_regex": true
         
     | 
| 58 | 
         
             
              },
         
     | 
| 59 | 
         
             
              "post_processor": {
         
     | 
| 60 | 
         
             
                "type": "RobertaProcessing",
         
     | 
| 
         | 
|
| 72 | 
         
             
              "decoder": {
         
     | 
| 73 | 
         
             
                "type": "ByteLevel",
         
     | 
| 74 | 
         
             
                "add_prefix_space": true,
         
     | 
| 75 | 
         
            +
                "trim_offsets": true,
         
     | 
| 76 | 
         
            +
                "use_regex": true
         
     | 
| 77 | 
         
             
              },
         
     | 
| 78 | 
         
             
              "model": {
         
     | 
| 79 | 
         
             
                "type": "BPE",
         
     | 
    	
        tokenizer_config.json
    CHANGED
    
    | 
         @@ -1 +1,16 @@ 
     | 
|
| 1 | 
         
            -
            { 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "add_prefix_space": false,
         
     | 
| 3 | 
         
            +
              "bos_token": "<s>",
         
     | 
| 4 | 
         
            +
              "cls_token": "<s>",
         
     | 
| 5 | 
         
            +
              "eos_token": "</s>",
         
     | 
| 6 | 
         
            +
              "errors": "replace",
         
     | 
| 7 | 
         
            +
              "mask_token": "<mask>",
         
     | 
| 8 | 
         
            +
              "model_max_length": 512,
         
     | 
| 9 | 
         
            +
              "name_or_path": "relbert-roberta-large-semeval2012-mask-prompt-b-nce",
         
     | 
| 10 | 
         
            +
              "pad_token": "<pad>",
         
     | 
| 11 | 
         
            +
              "sep_token": "</s>",
         
     | 
| 12 | 
         
            +
              "special_tokens_map_file": null,
         
     | 
| 13 | 
         
            +
              "tokenizer_class": "RobertaTokenizer",
         
     | 
| 14 | 
         
            +
              "trim_offsets": true,
         
     | 
| 15 | 
         
            +
              "unk_token": "<unk>"
         
     | 
| 16 | 
         
            +
            }
         
     |