Upload tokenizer
Browse files- tokenizer.json +1 -6
- tokenizer_config.json +1 -5
    	
        tokenizer.json
    CHANGED
    
    | @@ -1,11 +1,6 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "version": "1.0",
         | 
| 3 | 
            -
              "truncation":  | 
| 4 | 
            -
                "direction": "Right",
         | 
| 5 | 
            -
                "max_length": 2048,
         | 
| 6 | 
            -
                "strategy": "LongestFirst",
         | 
| 7 | 
            -
                "stride": 0
         | 
| 8 | 
            -
              },
         | 
| 9 | 
             
              "padding": null,
         | 
| 10 | 
             
              "added_tokens": [
         | 
| 11 | 
             
                {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "version": "1.0",
         | 
| 3 | 
            +
              "truncation": null,
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 4 | 
             
              "padding": null,
         | 
| 5 | 
             
              "added_tokens": [
         | 
| 6 | 
             
                {
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -305,12 +305,8 @@ | |
| 305 | 
             
              "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
         | 
| 306 | 
             
              "clean_up_tokenization_spaces": true,
         | 
| 307 | 
             
              "eos_token": "<|endoftext|>",
         | 
| 308 | 
            -
              " | 
| 309 | 
            -
              "model_max_length": 4096,
         | 
| 310 | 
             
              "pad_token": "<|endoftext|>",
         | 
| 311 | 
            -
              "stride": 0,
         | 
| 312 | 
             
              "tokenizer_class": "GPT2Tokenizer",
         | 
| 313 | 
            -
              "truncation_side": "right",
         | 
| 314 | 
            -
              "truncation_strategy": "longest_first",
         | 
| 315 | 
             
              "unk_token": "<|endoftext|>"
         | 
| 316 | 
             
            }
         | 
|  | |
| 305 | 
             
              "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
         | 
| 306 | 
             
              "clean_up_tokenization_spaces": true,
         | 
| 307 | 
             
              "eos_token": "<|endoftext|>",
         | 
| 308 | 
            +
              "model_max_length": 2048,
         | 
|  | |
| 309 | 
             
              "pad_token": "<|endoftext|>",
         | 
|  | |
| 310 | 
             
              "tokenizer_class": "GPT2Tokenizer",
         | 
|  | |
|  | |
| 311 | 
             
              "unk_token": "<|endoftext|>"
         | 
| 312 | 
             
            }
         |