ZachNagengast commited on
Commit
c939efe
·
verified ·
1 Parent(s): a28b7cc

Update with backwards compatible tokenizer format

Browse files
parakeet-tdt_ctc-110m/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "argmaxinc/ctckit-pro/parakeet-tdt_ctc-110m",
3
+ "bos_token_id": 1024,
4
+ "decoder_start_token_id": 1024,
5
+ "eos_token_id": 1024,
6
+ "vocab_size": 1024
7
+ }
parakeet-tdt_ctc-110m/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
parakeet-tdt_ctc-110m/tokenizer_config.json CHANGED
@@ -22,6 +22,6 @@
22
  "model_max_length": 1000000000000000019884624838656,
23
  "pad_token": "<pad>",
24
  "processor_class": "ParakeetProcessor",
25
- "tokenizer_class": "ParakeetTokenizerFast",
26
  "unk_token": "<unk>"
27
  }
 
22
  "model_max_length": 1000000000000000019884624838656,
23
  "pad_token": "<pad>",
24
  "processor_class": "ParakeetProcessor",
25
+ "tokenizer_class": "PreTrainedTokenizer",
26
  "unk_token": "<unk>"
27
  }