danielhanchen commited on
Commit
7d1258f
·
verified ·
1 Parent(s): 5898ef6

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,5 +13,5 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|PAD▁TOKEN|>"
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|▁pad▁|>"
17
  }
tokenizer.json CHANGED
@@ -7364,15 +7364,6 @@
7364
  "rstrip": false,
7365
  "normalized": true,
7366
  "special": false
7367
- },
7368
- {
7369
- "id": 128815,
7370
- "content": "<|PAD▁TOKEN|>",
7371
- "single_word": false,
7372
- "lstrip": false,
7373
- "rstrip": false,
7374
- "normalized": false,
7375
- "special": true
7376
  }
7377
  ],
7378
  "normalizer": {
 
7364
  "rstrip": false,
7365
  "normalized": true,
7366
  "special": false
 
 
 
 
 
 
 
 
 
7367
  }
7368
  ],
7369
  "normalizer": {
tokenizer_config.json CHANGED
@@ -6546,27 +6546,18 @@
6546
  "rstrip": false,
6547
  "single_word": false,
6548
  "special": false
6549
- },
6550
- "128815": {
6551
- "content": "<|PAD▁TOKEN|>",
6552
- "lstrip": false,
6553
- "normalized": false,
6554
- "rstrip": false,
6555
- "single_word": false,
6556
- "special": true
6557
  }
6558
  },
6559
  "bos_token": "<|begin▁of▁sentence|>",
6560
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6561
  "clean_up_tokenization_spaces": false,
6562
  "eos_token": "<|end▁of▁sentence|>",
6563
  "extra_special_tokens": {},
6564
  "legacy": true,
6565
  "model_max_length": 163840,
6566
- "pad_token": "<|PAD▁TOKEN|>",
6567
- "padding_side": "left",
6568
  "sp_model_kwargs": {},
6569
- "tokenizer_class": "LlamaTokenizer",
6570
  "unk_token": null,
6571
  "use_default_system_prompt": false
6572
  }
 
6546
  "rstrip": false,
6547
  "single_word": false,
6548
  "special": false
 
 
 
 
 
 
 
 
6549
  }
6550
  },
6551
  "bos_token": "<|begin▁of▁sentence|>",
6552
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
6553
  "clean_up_tokenization_spaces": false,
6554
  "eos_token": "<|end▁of▁sentence|>",
6555
  "extra_special_tokens": {},
6556
  "legacy": true,
6557
  "model_max_length": 163840,
6558
+ "pad_token": "<|▁pad▁|>",
 
6559
  "sp_model_kwargs": {},
6560
+ "tokenizer_class": "LlamaTokenizerFast",
6561
  "unk_token": null,
6562
  "use_default_system_prompt": false
6563
  }