tokenizer_config.json · vpelloin/MEDIA_NLU-flaubert_base_cased at fbd9dd4e0a54b8a74661608d4b16db5359935cfd

MEDIA_NLU-flaubert_base_cased / tokenizer_config.json

Upload tokenizer

f421d6f over 1 year ago

589 Bytes

	{
	"additional_special_tokens": [
	"<special0>",
	"<special1>",
	"<special2>",
	"<special3>",
	"<special4>",
	"<special5>",
	"<special6>",
	"<special7>",
	"<special8>",
	"<special9>"
	],
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": true,
	"cls_token": "</s>",
	"do_lower_case": false,
	"id2lang": null,
	"keep_accents": true,
	"lang2id": null,
	"mask_token": "<special1>",
	"model_max_length": 512,
	"pad_token": "<pad>",
	"sep_token": "</s>",
	"tokenizer_class": "FlaubertTokenizer",
	"tokenizer_file": null,
	"unk_token": "<unk>"
	}