10zinten
commited on
Commit
•
67137cd
1
Parent(s):
8071c07
add onnx
Browse files- config.json +2 -3
- generation_config.json +1 -1
- onnx/decoder_model.onnx +3 -0
- onnx/decoder_model_bnb4.onnx +3 -0
- onnx/decoder_model_fp16.onnx +3 -0
- onnx/decoder_model_int8.onnx +3 -0
- onnx/decoder_model_q4.onnx +3 -0
- onnx/decoder_model_q4f16.onnx +3 -0
- onnx/decoder_model_quantized.onnx +3 -0
- onnx/decoder_model_uint8.onnx +3 -0
- onnx/decoder_postnet_and_vocoder.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_bnb4.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_fp16.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_int8.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_q4.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_q4f16.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_quantized.onnx +3 -0
- onnx/decoder_postnet_and_vocoder_uint8.onnx +3 -0
- onnx/decoder_with_past_model.onnx +3 -0
- onnx/decoder_with_past_model_bnb4.onnx +3 -0
- onnx/decoder_with_past_model_fp16.onnx +3 -0
- onnx/decoder_with_past_model_int8.onnx +3 -0
- onnx/decoder_with_past_model_q4.onnx +3 -0
- onnx/decoder_with_past_model_q4f16.onnx +3 -0
- onnx/decoder_with_past_model_quantized.onnx +3 -0
- onnx/decoder_with_past_model_uint8.onnx +3 -0
- onnx/encoder_model.onnx +3 -0
- onnx/encoder_model_bnb4.onnx +3 -0
- onnx/encoder_model_fp16.onnx +3 -0
- onnx/encoder_model_int8.onnx +3 -0
- onnx/encoder_model_q4.onnx +3 -0
- onnx/encoder_model_q4f16.onnx +3 -0
- onnx/encoder_model_quantized.onnx +3 -0
- onnx/encoder_model_uint8.onnx +3 -0
- quantize_config.json +17 -0
- tokenizer.json +231 -0
- tokenizer_config.json +51 -0
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.1,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
@@ -84,8 +84,7 @@
|
|
84 |
"speech_decoder_prenet_dropout": 0.5,
|
85 |
"speech_decoder_prenet_layers": 2,
|
86 |
"speech_decoder_prenet_units": 256,
|
87 |
-
"
|
88 |
-
"transformers_version": "4.33.0.dev0",
|
89 |
"use_cache": false,
|
90 |
"use_guided_attention_loss": true,
|
91 |
"vocab_size": 81
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "openpecha/speecht5-tts-01",
|
3 |
"activation_dropout": 0.1,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
|
|
84 |
"speech_decoder_prenet_dropout": 0.5,
|
85 |
"speech_decoder_prenet_layers": 2,
|
86 |
"speech_decoder_prenet_units": 256,
|
87 |
+
"transformers_version": "4.43.4",
|
|
|
88 |
"use_cache": false,
|
89 |
"use_guided_attention_loss": true,
|
90 |
"vocab_size": 81
|
generation_config.json
CHANGED
@@ -5,5 +5,5 @@
|
|
5 |
"eos_token_id": 2,
|
6 |
"max_length": 1876,
|
7 |
"pad_token_id": 1,
|
8 |
-
"transformers_version": "4.
|
9 |
}
|
|
|
5 |
"eos_token_id": 2,
|
6 |
"max_length": 1876,
|
7 |
"pad_token_id": 1,
|
8 |
+
"transformers_version": "4.43.4"
|
9 |
}
|
onnx/decoder_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5861f41a72751e029b6393b15211f08e93396f3b71fa77932cd8c688c07c669
|
3 |
+
size 238389463
|
onnx/decoder_model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75965505976673c4e0f78e920078b7ae862e6e6c61eaf27640f99d5bfe4dd4ce
|
3 |
+
size 38979403
|
onnx/decoder_model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e76cc47e78a7861a1746e1002969fa58bd5b1c79ff40709d79e4106b4ff38533
|
3 |
+
size 119305248
|
onnx/decoder_model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:823e31c2b3966968d28492b05788a00f5d4f53ea4e7ef1d8b8086cc0583a4855
|
3 |
+
size 64743535
|
onnx/decoder_model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e3353cacdf5535b891faf6fa779ac72b9a5d9709c204e05ec618477c142e09d
|
3 |
+
size 42607312
|
onnx/decoder_model_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89e2155435bc07c1683b38a459886ac9badacbc4d89873829285c205e403c460
|
3 |
+
size 35923480
|
onnx/decoder_model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:823e31c2b3966968d28492b05788a00f5d4f53ea4e7ef1d8b8086cc0583a4855
|
3 |
+
size 64743535
|
onnx/decoder_model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e87f23d34ae2bfecb3301537613767cfa41c0281017b1eeb3433417fbadd8b2e
|
3 |
+
size 64743535
|
onnx/decoder_postnet_and_vocoder.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbd965cb53a619b9820021710da77705bc5e08a264c1ced65d9f593678d7f13f
|
3 |
+
size 55455058
|
onnx/decoder_postnet_and_vocoder_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:509fb2a028bed47ab8afa21c70244d5ff1616348ea314d2e16a2da213c4f09ac
|
3 |
+
size 55455077
|
onnx/decoder_postnet_and_vocoder_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f23dac8d29895f90ec03b7b03faa60f54f31d4ec405220e479736faaf16b0829
|
3 |
+
size 27759944
|
onnx/decoder_postnet_and_vocoder_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c97a30c295be29e1e06faef052be223ebbbcf163d20caa2f990aba929fcb2543
|
3 |
+
size 18254997
|
onnx/decoder_postnet_and_vocoder_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:509fb2a028bed47ab8afa21c70244d5ff1616348ea314d2e16a2da213c4f09ac
|
3 |
+
size 55455077
|
onnx/decoder_postnet_and_vocoder_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0db130e43cf870bede3f3cec1d064164c9abdbdf0978896daf47e6c43da4b089
|
3 |
+
size 27759963
|
onnx/decoder_postnet_and_vocoder_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:517a7a241890dbb33388cc9f09907771ac0411882d7c209e1c999246eda60ead
|
3 |
+
size 18254997
|
onnx/decoder_postnet_and_vocoder_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:517a7a241890dbb33388cc9f09907771ac0411882d7c209e1c999246eda60ead
|
3 |
+
size 18254997
|
onnx/decoder_with_past_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:314c8c93212125832988780bd65b2fbf7b244dd8197aac5069c96c3c79a3c9da
|
3 |
+
size 210030510
|
onnx/decoder_with_past_model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397236d5cb7432415ee0fe93376e0e3c9de1e75ce96f6481d2300ed052c3697c
|
3 |
+
size 34948830
|
onnx/decoder_with_past_model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bcaad87f2b1aba003f93dc2ec867e7256938a4d0a7758169fba04e9e34ef919
|
3 |
+
size 105123157
|
onnx/decoder_with_past_model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:405ea15db995fe2bf5122c98cf6c117fdd75c0e5ebbd892fd52809a94d76e744
|
3 |
+
size 57560019
|
onnx/decoder_with_past_model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5961f1cb633809a1b0965417e9dff848c9966aa8c785dc198196baaa0e6625be
|
3 |
+
size 38134455
|
onnx/decoder_with_past_model_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0aff61cda451eb260e1a63b523bdd9cddf3c86254fe996d70a3eef1699bb269
|
3 |
+
size 31914053
|
onnx/decoder_with_past_model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:405ea15db995fe2bf5122c98cf6c117fdd75c0e5ebbd892fd52809a94d76e744
|
3 |
+
size 57560019
|
onnx/decoder_with_past_model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97537ef63ab98325eb11a5fed9964afbc9252b84b59bfc9344d485b11ef156be
|
3 |
+
size 57560019
|
onnx/encoder_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a68bf8abf2b005b31d32a829efac70bda5357536e0045c5da62b1faf82d0bf0d
|
3 |
+
size 342759185
|
onnx/encoder_model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca032a5b440875d2b3222a5f26ab10458aac6f68b7fe0e1419c7dcb93bb9ac33
|
3 |
+
size 50807507
|
onnx/encoder_model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10215f235aac5572d07086e1eb421d0263111763af1909b1885024f26950234b
|
3 |
+
size 171559930
|
onnx/encoder_model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e3bef7d3297b1da9cdf809f7beee23c6ff6a1ac47389f2f425f77ee315eeb3d
|
3 |
+
size 88227663
|
onnx/encoder_model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:943f37c68b24359c89e7f09a1c29e62c5a00f5025dabf706f24ebc6437b61aa5
|
3 |
+
size 56115395
|
onnx/encoder_model_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b8c93002dd58c0ff7fe5cbcc3344138bb139baeee4df3f57d09554c850abd63
|
3 |
+
size 49477133
|
onnx/encoder_model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e3bef7d3297b1da9cdf809f7beee23c6ff6a1ac47389f2f425f77ee315eeb3d
|
3 |
+
size 88227663
|
onnx/encoder_model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a34759f65d5d7ba531cea2642b2be005a26dffc8dd9767f00f3abde6953c0c7d
|
3 |
+
size 88227663
|
quantize_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"modes": [
|
3 |
+
"fp16",
|
4 |
+
"q8",
|
5 |
+
"int8",
|
6 |
+
"uint8",
|
7 |
+
"q4",
|
8 |
+
"q4f16",
|
9 |
+
"bnb4"
|
10 |
+
],
|
11 |
+
"per_channel": true,
|
12 |
+
"reduce_range": true,
|
13 |
+
"block_size": null,
|
14 |
+
"is_symmetric": true,
|
15 |
+
"accuracy_level": null,
|
16 |
+
"quant_type": 1
|
17 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "1.0",
|
3 |
+
"truncation": null,
|
4 |
+
"padding": null,
|
5 |
+
"added_tokens": [
|
6 |
+
{
|
7 |
+
"id": 0,
|
8 |
+
"content": "<s>",
|
9 |
+
"single_word": false,
|
10 |
+
"lstrip": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"special": true
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"id": 1,
|
17 |
+
"content": "<pad>",
|
18 |
+
"single_word": false,
|
19 |
+
"lstrip": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"special": true
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"id": 2,
|
26 |
+
"content": "</s>",
|
27 |
+
"single_word": false,
|
28 |
+
"lstrip": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"special": true
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 3,
|
35 |
+
"content": "<unk>",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"special": true
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"id": 79,
|
44 |
+
"content": "<mask>",
|
45 |
+
"single_word": false,
|
46 |
+
"lstrip": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"special": true
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": 80,
|
53 |
+
"content": "<ctc_blank>",
|
54 |
+
"single_word": false,
|
55 |
+
"lstrip": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"normalized": false,
|
58 |
+
"special": true
|
59 |
+
}
|
60 |
+
],
|
61 |
+
"normalizer": {
|
62 |
+
"type": "Precompiled",
|
63 |
+
"precompiled_charsmap": null
|
64 |
+
},
|
65 |
+
"pre_tokenizer": {
|
66 |
+
"type": "Sequence",
|
67 |
+
"pretokenizers": [
|
68 |
+
{
|
69 |
+
"type": "WhitespaceSplit"
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"type": "Metaspace",
|
73 |
+
"replacement": "\u2581",
|
74 |
+
"add_prefix_space": true
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"type": "Split",
|
78 |
+
"pattern": {
|
79 |
+
"Regex": ""
|
80 |
+
},
|
81 |
+
"behavior": "Isolated",
|
82 |
+
"invert": false
|
83 |
+
}
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"post_processor": {
|
87 |
+
"type": "TemplateProcessing",
|
88 |
+
"single": [
|
89 |
+
{
|
90 |
+
"Sequence": {
|
91 |
+
"id": "A",
|
92 |
+
"type_id": 0
|
93 |
+
}
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"SpecialToken": {
|
97 |
+
"id": "</s>",
|
98 |
+
"type_id": 0
|
99 |
+
}
|
100 |
+
}
|
101 |
+
],
|
102 |
+
"pair": [
|
103 |
+
{
|
104 |
+
"Sequence": {
|
105 |
+
"id": "A",
|
106 |
+
"type_id": 0
|
107 |
+
}
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"SpecialToken": {
|
111 |
+
"id": "</s>",
|
112 |
+
"type_id": 0
|
113 |
+
}
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"Sequence": {
|
117 |
+
"id": "B",
|
118 |
+
"type_id": 0
|
119 |
+
}
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"SpecialToken": {
|
123 |
+
"id": "</s>",
|
124 |
+
"type_id": 0
|
125 |
+
}
|
126 |
+
}
|
127 |
+
],
|
128 |
+
"special_tokens": {
|
129 |
+
"</s>": {
|
130 |
+
"id": "</s>",
|
131 |
+
"ids": [
|
132 |
+
2
|
133 |
+
],
|
134 |
+
"tokens": [
|
135 |
+
"</s>"
|
136 |
+
]
|
137 |
+
}
|
138 |
+
}
|
139 |
+
},
|
140 |
+
"decoder": {
|
141 |
+
"type": "Metaspace",
|
142 |
+
"replacement": "\u2581",
|
143 |
+
"add_prefix_space": true
|
144 |
+
},
|
145 |
+
"model": {
|
146 |
+
"unk_id": 2,
|
147 |
+
"vocab": {
|
148 |
+
"<s>": 0,
|
149 |
+
"<pad>": 1,
|
150 |
+
"</s>": 2,
|
151 |
+
"<unk>": 3,
|
152 |
+
"\u2581": 4,
|
153 |
+
"e": 5,
|
154 |
+
"t": 6,
|
155 |
+
"a": 7,
|
156 |
+
"o": 8,
|
157 |
+
"n": 9,
|
158 |
+
"i": 10,
|
159 |
+
"h": 11,
|
160 |
+
"s": 12,
|
161 |
+
"r": 13,
|
162 |
+
"d": 14,
|
163 |
+
"l": 15,
|
164 |
+
"u": 16,
|
165 |
+
"c": 17,
|
166 |
+
"m": 18,
|
167 |
+
"f": 19,
|
168 |
+
"w": 20,
|
169 |
+
"g": 21,
|
170 |
+
"y": 22,
|
171 |
+
",": 23,
|
172 |
+
"p": 24,
|
173 |
+
"b": 25,
|
174 |
+
".": 26,
|
175 |
+
"v": 27,
|
176 |
+
"k": 28,
|
177 |
+
"\"": 29,
|
178 |
+
"I": 30,
|
179 |
+
"'": 31,
|
180 |
+
"T": 32,
|
181 |
+
"A": 33,
|
182 |
+
"S": 34,
|
183 |
+
"H": 35,
|
184 |
+
";": 36,
|
185 |
+
"x": 37,
|
186 |
+
"W": 38,
|
187 |
+
"-": 39,
|
188 |
+
"B": 40,
|
189 |
+
"?": 41,
|
190 |
+
"C": 42,
|
191 |
+
"M": 43,
|
192 |
+
"!": 44,
|
193 |
+
"q": 45,
|
194 |
+
"j": 46,
|
195 |
+
"E": 47,
|
196 |
+
"N": 48,
|
197 |
+
"P": 49,
|
198 |
+
"O": 50,
|
199 |
+
"D": 51,
|
200 |
+
"L": 52,
|
201 |
+
"G": 53,
|
202 |
+
"R": 54,
|
203 |
+
"F": 55,
|
204 |
+
"Y": 56,
|
205 |
+
"z": 57,
|
206 |
+
"J": 58,
|
207 |
+
":": 59,
|
208 |
+
"K": 60,
|
209 |
+
"U": 61,
|
210 |
+
"V": 62,
|
211 |
+
")": 63,
|
212 |
+
"(": 64,
|
213 |
+
"Q": 65,
|
214 |
+
"Z": 66,
|
215 |
+
"]": 67,
|
216 |
+
"[": 68,
|
217 |
+
"X": 69,
|
218 |
+
"\u2014": 70,
|
219 |
+
"/": 71,
|
220 |
+
"\u00e6": 72,
|
221 |
+
"\u00e9": 73,
|
222 |
+
"{": 74,
|
223 |
+
"}": 75,
|
224 |
+
"\u00ea": 76,
|
225 |
+
"\u0153": 77,
|
226 |
+
"\u0304": 78,
|
227 |
+
"<mask>": 79,
|
228 |
+
"<ctc_blank>": 80
|
229 |
+
}
|
230 |
+
}
|
231 |
+
}
|
tokenizer_config.json
CHANGED
@@ -1,7 +1,58 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"bos_token": "<s>",
|
3 |
"clean_up_tokenization_spaces": true,
|
4 |
"eos_token": "</s>",
|
|
|
5 |
"model_max_length": 600,
|
6 |
"normalize": false,
|
7 |
"pad_token": "<pad>",
|
|
|
1 |
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"79": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"80": {
|
44 |
+
"content": "<ctc_blank>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
}
|
51 |
+
},
|
52 |
"bos_token": "<s>",
|
53 |
"clean_up_tokenization_spaces": true,
|
54 |
"eos_token": "</s>",
|
55 |
+
"mask_token": "<mask>",
|
56 |
"model_max_length": 600,
|
57 |
"normalize": false,
|
58 |
"pad_token": "<pad>",
|