Upload tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +0 -9
- tokenizer_config.json +3 -12
special_tokens_map.json
CHANGED
@@ -13,5 +13,5 @@
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
-
"pad_token": "
|
17 |
}
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"pad_token": "<|▁pad▁|>"
|
17 |
}
|
tokenizer.json
CHANGED
@@ -7364,15 +7364,6 @@
|
|
7364 |
"rstrip": false,
|
7365 |
"normalized": true,
|
7366 |
"special": false
|
7367 |
-
},
|
7368 |
-
{
|
7369 |
-
"id": 128815,
|
7370 |
-
"content": "<|PAD▁TOKEN|>",
|
7371 |
-
"single_word": false,
|
7372 |
-
"lstrip": false,
|
7373 |
-
"rstrip": false,
|
7374 |
-
"normalized": false,
|
7375 |
-
"special": true
|
7376 |
}
|
7377 |
],
|
7378 |
"normalizer": {
|
|
|
7364 |
"rstrip": false,
|
7365 |
"normalized": true,
|
7366 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7367 |
}
|
7368 |
],
|
7369 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -6546,27 +6546,18 @@
|
|
6546 |
"rstrip": false,
|
6547 |
"single_word": false,
|
6548 |
"special": false
|
6549 |
-
},
|
6550 |
-
"128815": {
|
6551 |
-
"content": "<|PAD▁TOKEN|>",
|
6552 |
-
"lstrip": false,
|
6553 |
-
"normalized": false,
|
6554 |
-
"rstrip": false,
|
6555 |
-
"single_word": false,
|
6556 |
-
"special": true
|
6557 |
}
|
6558 |
},
|
6559 |
"bos_token": "<|begin▁of▁sentence|>",
|
6560 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant
|
6561 |
"clean_up_tokenization_spaces": false,
|
6562 |
"eos_token": "<|end▁of▁sentence|>",
|
6563 |
"extra_special_tokens": {},
|
6564 |
"legacy": true,
|
6565 |
"model_max_length": 163840,
|
6566 |
-
"pad_token": "
|
6567 |
-
"padding_side": "left",
|
6568 |
"sp_model_kwargs": {},
|
6569 |
-
"tokenizer_class": "
|
6570 |
"unk_token": null,
|
6571 |
"use_default_system_prompt": false
|
6572 |
}
|
|
|
6546 |
"rstrip": false,
|
6547 |
"single_word": false,
|
6548 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6549 |
}
|
6550 |
},
|
6551 |
"bos_token": "<|begin▁of▁sentence|>",
|
6552 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
6553 |
"clean_up_tokenization_spaces": false,
|
6554 |
"eos_token": "<|end▁of▁sentence|>",
|
6555 |
"extra_special_tokens": {},
|
6556 |
"legacy": true,
|
6557 |
"model_max_length": 163840,
|
6558 |
+
"pad_token": "<|▁pad▁|>",
|
|
|
6559 |
"sp_model_kwargs": {},
|
6560 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
6561 |
"unk_token": null,
|
6562 |
"use_default_system_prompt": false
|
6563 |
}
|