Motif-2.6B / tokenizer_config.json
SungminLee's picture
Upload folder using huggingface_hub
ec03c1a verified
raw
history blame
24.1 kB
{
"add_prefix_space": false,
"added_tokens_decoder": {
"219395": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219396": {
"content": "<|beginoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219397": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219398": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219399": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219400": {
"content": "<|system|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219401": {
"content": "<|user|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219402": {
"content": "<|assistant|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219403": {
"content": "<|startofturn|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219404": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219405": {
"content": "<|endofturn|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219406": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219407": {
"content": "<|dummy_id_3|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219408": {
"content": "<|dummy_id_4|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219409": {
"content": "<|dummy_id_5|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219410": {
"content": "<|dummy_id_6|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219411": {
"content": "<|dummy_id_7|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219412": {
"content": "<|dummy_id_8|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219413": {
"content": "<|dummy_id_9|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219414": {
"content": "<|dummy_id_10|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219415": {
"content": "<|dummy_id_11|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219416": {
"content": "<|endofprompt|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219417": {
"content": "<|dummy_id_12|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219418": {
"content": "<|dummy_id_13|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219419": {
"content": "<|dummy_id_14|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219420": {
"content": "<|dummy_id_15|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219421": {
"content": "<|dummy_id_16|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219422": {
"content": "<|dummy_id_17|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219423": {
"content": "<|dummy_id_18|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219424": {
"content": "<|dummy_id_19|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219425": {
"content": "<|dummy_id_20|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219426": {
"content": "<|dummy_id_21|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219427": {
"content": "<|dummy_id_22|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219428": {
"content": "<|dummy_id_23|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219429": {
"content": "<|dummy_id_24|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219430": {
"content": "<|dummy_id_25|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219431": {
"content": "<|dummy_id_26|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219432": {
"content": "<|dummy_id_27|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219433": {
"content": "<|dummy_id_28|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219434": {
"content": "<|dummy_id_29|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219435": {
"content": "<|dummy_id_30|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219436": {
"content": "<|dummy_id_31|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219437": {
"content": "<|dummy_id_32|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219438": {
"content": "<|dummy_id_33|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219439": {
"content": "<|dummy_id_34|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219440": {
"content": "<|dummy_id_35|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219441": {
"content": "<|dummy_id_36|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219442": {
"content": "<|dummy_id_37|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219443": {
"content": "<|dummy_id_38|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219444": {
"content": "<|dummy_id_39|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219445": {
"content": "<|dummy_id_40|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219446": {
"content": "<|dummy_id_41|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219447": {
"content": "<|dummy_id_42|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219448": {
"content": "<|dummy_id_43|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219449": {
"content": "<|dummy_id_44|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219450": {
"content": "<|dummy_id_45|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219451": {
"content": "<|dummy_id_46|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219452": {
"content": "<|dummy_id_47|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219453": {
"content": "<|dummy_id_48|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219454": {
"content": "<|dummy_id_49|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219455": {
"content": "<|dummy_id_50|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219456": {
"content": "<|dummy_id_51|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219457": {
"content": "<|dummy_id_52|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219458": {
"content": "<|dummy_id_53|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219459": {
"content": "<|dummy_id_54|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219460": {
"content": "<|dummy_id_55|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219461": {
"content": "<|dummy_id_56|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219462": {
"content": "<|dummy_id_57|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219463": {
"content": "<|dummy_id_58|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219464": {
"content": "<|dummy_id_59|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219465": {
"content": "<|dummy_id_60|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219466": {
"content": "<|dummy_id_61|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219467": {
"content": "<|dummy_id_62|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219468": {
"content": "<|dummy_id_63|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219469": {
"content": "<|dummy_id_64|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219470": {
"content": "<|dummy_id_65|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219471": {
"content": "<|dummy_id_66|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219472": {
"content": "<|dummy_id_67|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219473": {
"content": "<|dummy_id_68|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219474": {
"content": "<|dummy_id_69|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219475": {
"content": "<|dummy_id_70|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219476": {
"content": "<|dummy_id_71|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219477": {
"content": "<|dummy_id_72|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219478": {
"content": "<|dummy_id_73|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219479": {
"content": "<|dummy_id_74|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219480": {
"content": "<|dummy_id_75|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219481": {
"content": "<|dummy_id_76|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219482": {
"content": "<|dummy_id_77|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219483": {
"content": "<|dummy_id_78|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219484": {
"content": "<|dummy_id_79|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219485": {
"content": "<|dummy_id_80|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219486": {
"content": "<|dummy_id_81|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219487": {
"content": "<|dummy_id_82|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219488": {
"content": "<|dummy_id_83|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219489": {
"content": "<|dummy_id_84|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219490": {
"content": "<|dummy_id_85|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219491": {
"content": "<|dummy_id_86|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219492": {
"content": "<|dummy_id_87|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219493": {
"content": "<|dummy_id_88|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219494": {
"content": "<|dummy_id_89|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219495": {
"content": "<|dummy_id_90|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219496": {
"content": "<|dummy_id_91|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219497": {
"content": "<|dummy_id_92|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219498": {
"content": "<|dummy_id_93|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219499": {
"content": "<|dummy_id_94|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219500": {
"content": "<|dummy_id_95|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219501": {
"content": "<|dummy_id_96|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219502": {
"content": "<|dummy_id_97|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219503": {
"content": "<|dummy_id_98|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219504": {
"content": "<|dummy_id_99|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219505": {
"content": "<|dummy_id_100|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219506": {
"content": "<|dummy_id_101|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219507": {
"content": "<|dummy_id_102|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219508": {
"content": "<|dummy_id_103|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219509": {
"content": "<|dummy_id_104|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219510": {
"content": "<|dummy_id_105|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219511": {
"content": "<|dummy_id_106|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219512": {
"content": "<|dummy_id_107|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219513": {
"content": "<|dummy_id_108|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219514": {
"content": "<|dummy_id_109|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219515": {
"content": "<|dummy_id_110|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219516": {
"content": "<|dummy_id_111|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219517": {
"content": "<|dummy_id_112|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219518": {
"content": "<|dummy_id_113|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"219519": {
"content": "<|dummy_id_114|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"block_size": 2048,
"bos_token": "<|beginoftext|>",
"chat_template": "{{ bos_token }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'assistant' and '</think>' in content %}{% set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}{% set content = content.split('</think>')[-1].lstrip('\n') %}{{ '<|startofturn|><|assistant|>\n\n<think>\n' + reasoning_content + '\n</think>\n\n' + content + '<|endofturn|>' }}{% else %}{{ '<|startofturn|><|' + message['role'] + '|>\n\n' + content + '<|endofturn|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|startofturn|><|assistant|>\n\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"corruption_rate": 0.15,
"eos_token": "<|endoftext|>",
"extra_ids": 0,
"extra_special_tokens": {},
"fixed_vocab": true,
"merges_file_path": "./data/merges.txt",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"seq_length": 2048,
"tokenizer_class": "GPT2Tokenizer",
"tokenizer_name": "/nfs-ssd/motif_1/tokenizers/ver5",
"tokens": -1,
"unk_token": "<|endoftext|>",
"update_tokenizer": false,
"use_moreh_tokenizer": false,
"vocab_file_path": "./data/vocab.json",
"vocab_size": 219395
}