{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 3 ], "tokens": [ "" ] } } }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "C": 4, ".": 5, "1": 6, "c": 7, "O": 8, "=": 9, "(": 10, ")": 11, "N": 12, "5": 13, "6": 14, "4": 15, "7": 16, "8": 17, "3": 18, "2": 19, "n": 20, "9": 21, "F": 22, "%10": 23, "%11": 24, "S": 25, "s": 26, "Cl": 27, "[nH]": 28, "o": 29, "#": 30, "Br": 31, "%12": 32, "[N+]": 33, "[O-]": 34, "%13": 35, "I": 36, "[N-]": 37, "P": 38, "[n+]": 39, "%14": 40, "-": 41, "[Si]": 42, "[S+]": 43, "%15": 44, "B": 45, "%16": 46, "[NH+]": 47, "[B-]": 48, "%17": 49, "[NH2+]": 50, "[O]": 51, "[NH3+]": 52, "[PH]": 53, "[n-]": 54, "%18": 55, "[nH+]": 56, "[Sn]": 57, "[s+]": 58, "%19": 59, "[Se]": 60, "[Cl-]": 61, "%20": 62, "[N]": 63, "[C-]": 64, "[C]": 65, "[SiH]": 66, "%21": 67, "[O+]": 68, "[SH]": 69, "[NH]": 70, "[P+]": 71, "[c-]": 72, "[o+]": 73 }, "unk_token": "" } }