{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 14, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 15, "content": "<|im_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 16, "content": "<|im_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 17, "content": "<|object_ref_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 18, "content": "<|object_ref_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 19, "content": "<|box_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 20, "content": "<|box_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 21, "content": "<|quad_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 22, "content": "<|quad_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 23, "content": "<|vision_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 24, "content": "<|vision_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 25, "content": "<|vision_pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 26, "content": "<|image_pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 27, "content": "<|video_pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "NFC" }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" }, "behavior": "Isolated", "invert": false }, { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": false } ] }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, ":": 10, "A": 11, "Q": 12, "Ċ": 13 }, "merges": [] } }