TeetouchQQ commited on
Commit
1668591
·
verified ·
1 Parent(s): 8b898a9

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,5 +1,3 @@
1
  {
2
- "<image_soft_token>": 262144,
3
- "<|eom_id|>": 262146,
4
- "<|eot_id|>": 262145
5
  }
 
1
  {
2
+ "<image_soft_token>": 262144
 
 
3
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "additional_special_tokens": [
3
- "<end_of_turn>",
4
- "<|eom_id|>"
5
  ],
6
  "boi_token": "<start_of_image>",
7
  "bos_token": {
@@ -13,7 +12,7 @@
13
  },
14
  "eoi_token": "<end_of_image>",
15
  "eos_token": {
16
- "content": "<|eot_id|>",
17
  "lstrip": false,
18
  "normalized": false,
19
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "<end_of_turn>"
 
4
  ],
5
  "boi_token": "<start_of_image>",
6
  "bos_token": {
 
12
  },
13
  "eoi_token": "<end_of_image>",
14
  "eos_token": {
15
+ "content": "<end_of_turn>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90ceda7b4cf186df162bb142751be5a9a1d5a7e0bf8c8afe20c050f67aa1e8e
3
- size 33384942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json CHANGED
@@ -51321,34 +51321,17 @@
51321
  "rstrip": false,
51322
  "single_word": false,
51323
  "special": true
51324
- },
51325
- "262145": {
51326
- "content": "<|eot_id|>",
51327
- "lstrip": false,
51328
- "normalized": false,
51329
- "rstrip": false,
51330
- "single_word": false,
51331
- "special": true
51332
- },
51333
- "262146": {
51334
- "content": "<|eom_id|>",
51335
- "lstrip": false,
51336
- "normalized": false,
51337
- "rstrip": false,
51338
- "single_word": false,
51339
- "special": true
51340
  }
51341
  },
51342
  "additional_special_tokens": [
51343
- "<end_of_turn>",
51344
- "<|eom_id|>"
51345
  ],
51346
  "boi_token": "<start_of_image>",
51347
  "bos_token": "<bos>",
51348
  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
51349
  "clean_up_tokenization_spaces": false,
51350
  "eoi_token": "<end_of_image>",
51351
- "eos_token": "<|eot_id|>",
51352
  "extra_special_tokens": {
51353
  "boi_token": "<start_of_image>",
51354
  "eoi_token": "<end_of_image>",
@@ -51357,7 +51340,7 @@
51357
  "image_token": "<image_soft_token>",
51358
  "model_max_length": 1000000000000000019884624838656,
51359
  "pad_token": "<pad>",
51360
- "padding_side": "left",
51361
  "processor_class": "Gemma3Processor",
51362
  "sp_model_kwargs": null,
51363
  "spaces_between_special_tokens": false,
 
51321
  "rstrip": false,
51322
  "single_word": false,
51323
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51324
  }
51325
  },
51326
  "additional_special_tokens": [
51327
+ "<end_of_turn>"
 
51328
  ],
51329
  "boi_token": "<start_of_image>",
51330
  "bos_token": "<bos>",
51331
  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
51332
  "clean_up_tokenization_spaces": false,
51333
  "eoi_token": "<end_of_image>",
51334
+ "eos_token": "<end_of_turn>",
51335
  "extra_special_tokens": {
51336
  "boi_token": "<start_of_image>",
51337
  "eoi_token": "<end_of_image>",
 
51340
  "image_token": "<image_soft_token>",
51341
  "model_max_length": 1000000000000000019884624838656,
51342
  "pad_token": "<pad>",
51343
+ "padding_side": "right",
51344
  "processor_class": "Gemma3Processor",
51345
  "sp_model_kwargs": null,
51346
  "spaces_between_special_tokens": false,