deep-div commited on
Commit
fa5d634
·
verified ·
1 Parent(s): 94aab4a

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -35
  2. LICENSE +202 -0
  3. README.md +14 -14
  4. app.py +274 -0
  5. model.py +1071 -0
  6. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Text To Speech TTS
3
- emoji: 🏢
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.38.2
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Convert text to Speech
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: tts Text To Speech
3
+ emoji: 🌍
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.44.1
8
+ python_version: 3.8.9
9
+ app_file: app.py
10
+ pinned: false
11
+ license: apache-2.0
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
4
+ #
5
+ # See LICENSE for clarification regarding multiple authors
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ # References:
20
+ # https://gradio.app/docs/#dropdown
21
+
22
+ import os
23
+ import time
24
+ import uuid
25
+ from datetime import datetime
26
+
27
+ import gradio as gr
28
+ import soundfile as sf
29
+
30
+ from model import get_pretrained_model, language_to_models
31
+
32
+
33
+ def MyPrint(s):
34
+ now = datetime.now()
35
+ date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
36
+ print(f"{date_time}: {s}")
37
+
38
+
39
+ title = "# Next-gen Kaldi: Text-to-speech (TTS)"
40
+
41
+ description = """
42
+ This space shows how to convert text to speech with Next-gen Kaldi.
43
+
44
+ It is running on CPU within a docker container provided by Hugging Face.
45
+
46
+ See more information by visiting the following links:
47
+
48
+ - <https://github.com/k2-fsa/sherpa-onnx>
49
+
50
+ If you want to deploy it locally, please see
51
+ <https://k2-fsa.github.io/sherpa/>
52
+
53
+ If you want to use Android APKs, please see
54
+ <https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
55
+
56
+ If you want to use Android text-to-speech engine APKs, please see
57
+ <https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
58
+
59
+ If you want to download an all-in-one exe for Windows, please see
60
+ <https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
61
+
62
+ """
63
+
64
+ # css style is copied from
65
+ # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
66
+ css = """
67
+ .result {display:flex;flex-direction:column}
68
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
69
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
70
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
71
+ """
72
+
73
+ examples = [
74
+ [
75
+ "Chinese (Mandarin, 普通话)",
76
+ "csukuangfj/matcha-icefall-zh-baker|1 speaker",
77
+ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。",
78
+ 0,
79
+ 1.0,
80
+ ],
81
+ [
82
+ "Chinese (Mandarin, 普通话)",
83
+ "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker",
84
+ "在一个阳光明媚的夏天,小马、小羊和小狗它们一块儿在广阔的草地上,嬉戏玩耍,这时小猴来了,还带着它心爱的足球活蹦乱跳地跑前、跑后教小马、小羊、小狗踢足球。",
85
+ 0,
86
+ 1.0,
87
+ ],
88
+ [
89
+ "Chinese (Mandarin, 普通话)",
90
+ "csukuangfj/vits-zh-hf-fanchen-C|187 speakers",
91
+ '小米的使命是,始终坚持做"感动人心、价格厚道"的好产品,让全球每个人都能享受科技带来的美好生活。',
92
+ 0,
93
+ 1.0,
94
+ ],
95
+ ["Min-nan (闽南话)", "csukuangfj/vits-mms-nan", "ài piaǸ chiah ē iaN̂", 0, 1.0],
96
+ ["Thai", "csukuangfj/vits-mms-tha", "ฉันรักคุณ", 0, 1.0],
97
+ [
98
+ "Chinese (Mandarin, 普通话)",
99
+ "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers",
100
+ "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。",
101
+ 2,
102
+ 1.0,
103
+ ],
104
+ ]
105
+
106
+
107
+ def update_model_dropdown(language: str):
108
+ if language in language_to_models:
109
+ choices = language_to_models[language]
110
+ return gr.Dropdown(
111
+ choices=choices,
112
+ value=choices[0],
113
+ interactive=True,
114
+ )
115
+
116
+ raise ValueError(f"Unsupported language: {language}")
117
+
118
+
119
+ def build_html_output(s: str, style: str = "result_item_success"):
120
+ return f"""
121
+ <div class='result'>
122
+ <div class='result_item {style}'>
123
+ {s}
124
+ </div>
125
+ </div>
126
+ """
127
+
128
+
129
+ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
130
+ MyPrint(f"Input text: {text}. sid: {sid}, speed: {speed}")
131
+ sid = int(sid)
132
+ tts = get_pretrained_model(repo_id, speed)
133
+
134
+ start = time.time()
135
+ audio = tts.generate(text, sid=sid)
136
+ end = time.time()
137
+
138
+ if len(audio.samples) == 0:
139
+ raise ValueError(
140
+ "Error in generating audios. Please read previous error messages."
141
+ )
142
+
143
+ duration = len(audio.samples) / audio.sample_rate
144
+
145
+ elapsed_seconds = end - start
146
+ rtf = elapsed_seconds / duration
147
+
148
+ info = f"""
149
+ Wave duration : {duration:.3f} s <br/>
150
+ Processing time: {elapsed_seconds:.3f} s <br/>
151
+ RTF: {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f} <br/>
152
+ """
153
+
154
+ MyPrint(info)
155
+ MyPrint(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
156
+
157
+ filename = str(uuid.uuid4())
158
+ filename = f"{filename}.wav"
159
+ sf.write(
160
+ filename,
161
+ audio.samples,
162
+ samplerate=audio.sample_rate,
163
+ subtype="PCM_16",
164
+ )
165
+
166
+ return filename, build_html_output(info)
167
+
168
+
169
+ demo = gr.Blocks(css=css)
170
+
171
+
172
+ with demo:
173
+ gr.Markdown(title)
174
+ language_choices = list(language_to_models.keys())
175
+
176
+ language_radio = gr.Radio(
177
+ label="Language",
178
+ choices=language_choices,
179
+ value=language_choices[0],
180
+ )
181
+
182
+ model_dropdown = gr.Dropdown(
183
+ choices=language_to_models[language_choices[0]],
184
+ label="Select a model",
185
+ value=language_to_models[language_choices[0]][0],
186
+ )
187
+
188
+ language_radio.change(
189
+ update_model_dropdown,
190
+ inputs=language_radio,
191
+ outputs=model_dropdown,
192
+ )
193
+
194
+ with gr.Tabs():
195
+ with gr.TabItem("Please input your text"):
196
+ input_text = gr.Textbox(
197
+ label="Input text",
198
+ info="Your text",
199
+ lines=3,
200
+ placeholder="Please input your text here",
201
+ )
202
+
203
+ input_sid = gr.Textbox(
204
+ label="Speaker ID",
205
+ info="Speaker ID",
206
+ lines=1,
207
+ max_lines=1,
208
+ value="0",
209
+ placeholder="Speaker ID. Valid only for mult-speaker model",
210
+ )
211
+
212
+ input_speed = gr.Slider(
213
+ minimum=0.1,
214
+ maximum=10,
215
+ value=1,
216
+ step=0.1,
217
+ label="Speed (larger->faster; smaller->slower)",
218
+ )
219
+
220
+ input_button = gr.Button("Submit")
221
+
222
+ output_audio = gr.Audio(label="Output")
223
+
224
+ output_info = gr.HTML(label="Info")
225
+
226
+ gr.Examples(
227
+ examples=examples,
228
+ fn=process,
229
+ inputs=[
230
+ language_radio,
231
+ model_dropdown,
232
+ input_text,
233
+ input_sid,
234
+ input_speed,
235
+ ],
236
+ outputs=[
237
+ output_audio,
238
+ output_info,
239
+ ],
240
+ )
241
+
242
+ input_button.click(
243
+ process,
244
+ inputs=[
245
+ language_radio,
246
+ model_dropdown,
247
+ input_text,
248
+ input_sid,
249
+ input_speed,
250
+ ],
251
+ outputs=[
252
+ output_audio,
253
+ output_info,
254
+ ],
255
+ )
256
+
257
+ gr.Markdown(description)
258
+
259
+
260
+ def download_espeak_ng_data():
261
+ os.system(
262
+ """
263
+ cd /tmp
264
+ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
265
+ tar xf espeak-ng-data.tar.bz2
266
+ """
267
+ )
268
+
269
+
270
+ if __name__ == "__main__":
271
+ download_espeak_ng_data()
272
+ formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
273
+
274
+ demo.launch()
model.py ADDED
@@ -0,0 +1,1071 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
2
+ #
3
+ # See LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import os
18
+ from functools import lru_cache
19
+ from pathlib import Path
20
+
21
+ import sherpa_onnx
22
+ from huggingface_hub import hf_hub_download
23
+
24
+
25
+ def get_file(
26
+ repo_id: str,
27
+ filename: str,
28
+ subfolder: str = ".",
29
+ ) -> str:
30
+ model_filename = hf_hub_download(
31
+ repo_id=repo_id,
32
+ filename=filename,
33
+ subfolder=subfolder,
34
+ )
35
+ return model_filename
36
+
37
+
38
+ @lru_cache(maxsize=10)
39
+ def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
40
+ assert repo_id == "csukuangfj/vits-vctk"
41
+
42
+ model = get_file(
43
+ repo_id=repo_id,
44
+ filename="vits-vctk.onnx",
45
+ subfolder=".",
46
+ )
47
+
48
+ lexicon = get_file(
49
+ repo_id=repo_id,
50
+ filename="lexicon.txt",
51
+ subfolder=".",
52
+ )
53
+
54
+ tokens = get_file(
55
+ repo_id=repo_id,
56
+ filename="tokens.txt",
57
+ subfolder=".",
58
+ )
59
+
60
+ tts_config = sherpa_onnx.OfflineTtsConfig(
61
+ model=sherpa_onnx.OfflineTtsModelConfig(
62
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
63
+ model=model,
64
+ lexicon=lexicon,
65
+ tokens=tokens,
66
+ length_scale=1.0 / speed,
67
+ ),
68
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
69
+ provider="cpu",
70
+ debug=True,
71
+ num_threads=2,
72
+ ),
73
+ max_num_sentences=1,
74
+ )
75
+ tts = sherpa_onnx.OfflineTts(tts_config)
76
+
77
+ return tts
78
+
79
+
80
+ @lru_cache(maxsize=10)
81
+ def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
82
+ assert repo_id == "csukuangfj/vits-ljs"
83
+
84
+ model = get_file(
85
+ repo_id=repo_id,
86
+ filename="vits-ljs.onnx",
87
+ subfolder=".",
88
+ )
89
+
90
+ lexicon = get_file(
91
+ repo_id=repo_id,
92
+ filename="lexicon.txt",
93
+ subfolder=".",
94
+ )
95
+
96
+ tokens = get_file(
97
+ repo_id=repo_id,
98
+ filename="tokens.txt",
99
+ subfolder=".",
100
+ )
101
+
102
+ tts_config = sherpa_onnx.OfflineTtsConfig(
103
+ model=sherpa_onnx.OfflineTtsModelConfig(
104
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
105
+ model=model,
106
+ lexicon=lexicon,
107
+ tokens=tokens,
108
+ length_scale=1.0 / speed,
109
+ ),
110
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
111
+ provider="cpu",
112
+ debug=True,
113
+ num_threads=2,
114
+ ),
115
+ max_num_sentences=1,
116
+ )
117
+ tts = sherpa_onnx.OfflineTts(tts_config)
118
+
119
+ return tts
120
+
121
+
122
+ @lru_cache(maxsize=10)
123
+ def _get_kokoro(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
124
+ data_dir = "/tmp/espeak-ng-data"
125
+ repo_id = repo_id.split("|")[0]
126
+ assert repo_id in (
127
+ "csukuangfj/kokoro-en-v0_19",
128
+ "csukuangfj/kokoro-multi-lang-v1_0",
129
+ "csukuangfj/kokoro-multi-lang-v1_1",
130
+ ), repo_id
131
+
132
+ model = get_file(
133
+ repo_id=repo_id,
134
+ filename="model.onnx",
135
+ subfolder=".",
136
+ )
137
+
138
+ tokens = get_file(
139
+ repo_id=repo_id,
140
+ filename="tokens.txt",
141
+ subfolder=".",
142
+ )
143
+
144
+ voices = get_file(
145
+ repo_id=repo_id,
146
+ filename="voices.bin",
147
+ subfolder=".",
148
+ )
149
+ if repo_id in (
150
+ "csukuangfj/kokoro-multi-lang-v1_0",
151
+ "csukuangfj/kokoro-multi-lang-v1_1",
152
+ ):
153
+ lexicon_en = get_file(
154
+ repo_id=repo_id,
155
+ filename="lexicon-us-en.txt",
156
+ subfolder=".",
157
+ )
158
+ lexicon_zh = get_file(
159
+ repo_id=repo_id,
160
+ filename="lexicon-zh.txt",
161
+ subfolder=".",
162
+ )
163
+ lexicon = f"{lexicon_en},{lexicon_zh}"
164
+
165
+ date_zh = get_file(
166
+ repo_id=repo_id,
167
+ filename="date-zh.fst",
168
+ subfolder=".",
169
+ )
170
+
171
+ number_zh = get_file(
172
+ repo_id=repo_id,
173
+ filename="number-zh.fst",
174
+ subfolder=".",
175
+ )
176
+ phone_zh = get_file(
177
+ repo_id=repo_id,
178
+ filename="phone-zh.fst",
179
+ subfolder=".",
180
+ )
181
+ rule_fsts = f"{date_zh},{phone_zh},{number_zh}"
182
+ dict_dir = "/tmp/dict"
183
+ else:
184
+ lexicon = ""
185
+ rule_fsts = ""
186
+ dict_dir = ""
187
+
188
+ tts_config = sherpa_onnx.OfflineTtsConfig(
189
+ model=sherpa_onnx.OfflineTtsModelConfig(
190
+ kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
191
+ model=model,
192
+ voices=voices,
193
+ tokens=tokens,
194
+ data_dir=data_dir,
195
+ length_scale=1.0 / speed,
196
+ lexicon=lexicon,
197
+ dict_dir=dict_dir,
198
+ ),
199
+ provider="cpu",
200
+ debug=True,
201
+ num_threads=2,
202
+ ),
203
+ max_num_sentences=1,
204
+ rule_fsts=rule_fsts,
205
+ )
206
+
207
+ tts = sherpa_onnx.OfflineTts(tts_config)
208
+
209
+ return tts
210
+
211
+
212
+ @lru_cache(maxsize=10)
213
+ def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
214
+ data_dir = "/tmp/espeak-ng-data"
215
+ repo_id = repo_id.split("|")[0]
216
+
217
+ if "coqui" in repo_id or "vits-mms" in repo_id:
218
+ name = "model"
219
+ elif "piper" in repo_id:
220
+ n = len("vits-piper-")
221
+ name = repo_id.split("/")[1][n:]
222
+ elif "mimic3" in repo_id:
223
+ n = len("vits-mimic3-")
224
+ name = repo_id.split("/")[1][n:]
225
+ else:
226
+ raise ValueError(f"Unsupported {repo_id}")
227
+
228
+ if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
229
+ data_dir = ""
230
+
231
+ model = get_file(
232
+ repo_id=repo_id,
233
+ filename=f"{name}.onnx",
234
+ subfolder=".",
235
+ )
236
+
237
+ tokens = get_file(
238
+ repo_id=repo_id,
239
+ filename="tokens.txt",
240
+ subfolder=".",
241
+ )
242
+
243
+ tts_config = sherpa_onnx.OfflineTtsConfig(
244
+ model=sherpa_onnx.OfflineTtsModelConfig(
245
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
246
+ model=model,
247
+ lexicon="",
248
+ data_dir=data_dir,
249
+ tokens=tokens,
250
+ length_scale=1.0 / speed,
251
+ ),
252
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
253
+ provider="cpu",
254
+ debug=True,
255
+ num_threads=2,
256
+ ),
257
+ max_num_sentences=1,
258
+ )
259
+ tts = sherpa_onnx.OfflineTts(tts_config)
260
+
261
+ return tts
262
+
263
+
264
+ @lru_cache(maxsize=10)
265
+ def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
266
+ return _get_vits_piper(repo_id, speed)
267
+
268
+
269
+ @lru_cache(maxsize=10)
270
+ def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
271
+ repo_id = repo_id.split("|")[0]
272
+ assert repo_id == "csukuangfj/vits-zh-aishell3", repo_id
273
+
274
+ model = get_file(
275
+ repo_id=repo_id,
276
+ filename="vits-aishell3.onnx",
277
+ subfolder=".",
278
+ )
279
+
280
+ lexicon = get_file(
281
+ repo_id=repo_id,
282
+ filename="lexicon.txt",
283
+ subfolder=".",
284
+ )
285
+
286
+ tokens = get_file(
287
+ repo_id=repo_id,
288
+ filename="tokens.txt",
289
+ subfolder=".",
290
+ )
291
+
292
+ rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
293
+
294
+ rule_fsts = [
295
+ get_file(
296
+ repo_id=repo_id,
297
+ filename=f,
298
+ subfolder=".",
299
+ )
300
+ for f in rule_fsts
301
+ ]
302
+ rule_fsts = ",".join(rule_fsts)
303
+
304
+ rule_fars = get_file(
305
+ repo_id=repo_id,
306
+ filename="rule.far",
307
+ subfolder=".",
308
+ )
309
+
310
+ tts_config = sherpa_onnx.OfflineTtsConfig(
311
+ model=sherpa_onnx.OfflineTtsModelConfig(
312
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
313
+ model=model,
314
+ lexicon=lexicon,
315
+ tokens=tokens,
316
+ length_scale=1.0 / speed,
317
+ ),
318
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
319
+ provider="cpu",
320
+ debug=True,
321
+ num_threads=2,
322
+ ),
323
+ rule_fsts=rule_fsts,
324
+ rule_fars=rule_fars,
325
+ max_num_sentences=1,
326
+ )
327
+ tts = sherpa_onnx.OfflineTts(tts_config)
328
+
329
+ return tts
330
+
331
+
332
+ @lru_cache(maxsize=10)
333
+ def _get_matcha_hf_espeak(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
334
+ repo_id = repo_id.split("|")[0]
335
+ assert repo_id in (
336
+ "csukuangfj/matcha-tts-fa_en-khadijah",
337
+ "csukuangfj/matcha-tts-fa_en-musa",
338
+ ), repo_id
339
+
340
+ acoustic_model = get_file(
341
+ repo_id=repo_id,
342
+ filename="model.onnx",
343
+ subfolder=".",
344
+ )
345
+
346
+ vocoder = get_file(
347
+ repo_id="csukuangfj/sherpa-onnx-hifigan",
348
+ filename="hifigan_v2.onnx",
349
+ subfolder=".",
350
+ )
351
+
352
+ tokens = get_file(
353
+ repo_id=repo_id,
354
+ filename="tokens.txt",
355
+ subfolder=".",
356
+ )
357
+
358
+ data_dir = "/tmp/espeak-ng-data"
359
+ tts_config = sherpa_onnx.OfflineTtsConfig(
360
+ model=sherpa_onnx.OfflineTtsModelConfig(
361
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
362
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
363
+ acoustic_model=acoustic_model,
364
+ vocoder=vocoder,
365
+ tokens=tokens,
366
+ lexicon="",
367
+ data_dir=data_dir,
368
+ length_scale=1.0 / speed,
369
+ ),
370
+ provider="cpu",
371
+ debug=True,
372
+ num_threads=2,
373
+ ),
374
+ max_num_sentences=1,
375
+ )
376
+ tts = sherpa_onnx.OfflineTts(tts_config)
377
+
378
+ return tts
379
+
380
+
381
+ @lru_cache(maxsize=10)
382
+ def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
383
+ repo_id = repo_id.split("|")[0]
384
+ assert repo_id in ("csukuangfj/matcha-icefall-zh-baker",), repo_id
385
+
386
+ if repo_id == "csukuangfj/matcha-icefall-zh-baker":
387
+ acoustic_model = "model-steps-3.onnx"
388
+
389
+ if not Path("/tmp/dict").is_dir():
390
+ os.system(
391
+ "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
392
+ )
393
+ os.system("ls -lh /tmp/dict")
394
+
395
+ acoustic_model = get_file(
396
+ repo_id=repo_id,
397
+ filename=acoustic_model,
398
+ subfolder=".",
399
+ )
400
+
401
+ vocoder = get_file(
402
+ repo_id="csukuangfj/sherpa-onnx-hifigan",
403
+ filename="hifigan_v2.onnx",
404
+ subfolder=".",
405
+ )
406
+
407
+ lexicon = get_file(
408
+ repo_id=repo_id,
409
+ filename="lexicon.txt",
410
+ subfolder=".",
411
+ )
412
+
413
+ tokens = get_file(
414
+ repo_id=repo_id,
415
+ filename="tokens.txt",
416
+ subfolder=".",
417
+ )
418
+
419
+ rule_fars = ""
420
+
421
+ rule_fsts = ["phone.fst", "date.fst", "number.fst"]
422
+
423
+ rule_fsts = [
424
+ get_file(
425
+ repo_id=repo_id,
426
+ filename=f,
427
+ subfolder=".",
428
+ )
429
+ for f in rule_fsts
430
+ ]
431
+ rule_fsts = ",".join(rule_fsts)
432
+
433
+ dict_dir = "/tmp/dict"
434
+
435
+ tts_config = sherpa_onnx.OfflineTtsConfig(
436
+ model=sherpa_onnx.OfflineTtsModelConfig(
437
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
438
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
439
+ acoustic_model=acoustic_model,
440
+ vocoder=vocoder,
441
+ lexicon=lexicon,
442
+ tokens=tokens,
443
+ dict_dir=dict_dir,
444
+ length_scale=1.0 / speed,
445
+ ),
446
+ provider="cpu",
447
+ debug=True,
448
+ num_threads=2,
449
+ ),
450
+ rule_fsts=rule_fsts,
451
+ rule_fars=rule_fars,
452
+ max_num_sentences=1,
453
+ )
454
+ tts = sherpa_onnx.OfflineTts(tts_config)
455
+
456
+ return tts
457
+
458
+
459
+ @lru_cache(maxsize=10)
460
+ def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
461
+ repo_id = repo_id.split("|")[0]
462
+
463
+ if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
464
+ model = repo_id.split("/")[-1]
465
+ elif "csukuangfj/vits-melo-tts-zh_en" == repo_id:
466
+ model = "model"
467
+ else:
468
+ model = repo_id.split("-")[-1]
469
+
470
+ if "sherpa-onnx-vits-zh-ll" in repo_id:
471
+ model = "model"
472
+
473
+ if not Path("/tmp/dict").is_dir():
474
+ os.system(
475
+ "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
476
+ )
477
+ os.system("ls -lh /tmp/dict")
478
+
479
+ model = get_file(
480
+ repo_id=repo_id,
481
+ filename=f"{model}.onnx",
482
+ subfolder=".",
483
+ )
484
+
485
+ lexicon = get_file(
486
+ repo_id=repo_id,
487
+ filename="lexicon.txt",
488
+ subfolder=".",
489
+ )
490
+
491
+ tokens = get_file(
492
+ repo_id=repo_id,
493
+ filename="tokens.txt",
494
+ subfolder=".",
495
+ )
496
+
497
+ rule_fars = ""
498
+
499
+ if "vits-cantonese-hf-xiaomaiiwn" not in repo_id:
500
+ rule_fsts = ["phone.fst", "date.fst", "number.fst"]
501
+
502
+ rule_fsts = [
503
+ get_file(
504
+ repo_id=repo_id,
505
+ filename=f,
506
+ subfolder=".",
507
+ )
508
+ for f in rule_fsts
509
+ ]
510
+ rule_fsts = ",".join(rule_fsts)
511
+
512
+ # rule_fars = get_file(
513
+ # repo_id=repo_id,
514
+ # filename="rule.far",
515
+ # subfolder=".",
516
+ # )
517
+ vits_dict_dir = "/tmp/dict"
518
+ else:
519
+ rule_fsts = get_file(
520
+ repo_id=repo_id,
521
+ filename="rule.fst",
522
+ subfolder=".",
523
+ )
524
+ vits_dict_dir = ""
525
+
526
+ tts_config = sherpa_onnx.OfflineTtsConfig(
527
+ model=sherpa_onnx.OfflineTtsModelConfig(
528
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
529
+ model=model,
530
+ lexicon=lexicon,
531
+ tokens=tokens,
532
+ dict_dir=vits_dict_dir,
533
+ length_scale=1.0 / speed,
534
+ ),
535
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
536
+ provider="cpu",
537
+ debug=True,
538
+ num_threads=2,
539
+ ),
540
+ rule_fsts=rule_fsts,
541
+ rule_fars=rule_fars,
542
+ max_num_sentences=1,
543
+ )
544
+ tts = sherpa_onnx.OfflineTts(tts_config)
545
+
546
+ return tts
547
+
548
+
549
+ @lru_cache(maxsize=10)
550
+ def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
551
+ if repo_id in chinese_models:
552
+ return chinese_models[repo_id](repo_id, speed)
553
+ elif repo_id in chinese_english_models:
554
+ return chinese_english_models[repo_id](repo_id, speed)
555
+ elif repo_id in persian_english_models:
556
+ return persian_english_models[repo_id](repo_id, speed)
557
+ if repo_id in cantonese_models:
558
+ return cantonese_models[repo_id](repo_id, speed)
559
+ elif repo_id in english_models:
560
+ return english_models[repo_id](repo_id, speed)
561
+ elif repo_id in german_models:
562
+ return german_models[repo_id](repo_id, speed)
563
+ elif repo_id in spanish_models:
564
+ return spanish_models[repo_id](repo_id, speed)
565
+ elif repo_id in french_models:
566
+ return french_models[repo_id](repo_id, speed)
567
+ elif repo_id in ukrainian_models:
568
+ return ukrainian_models[repo_id](repo_id, speed)
569
+ elif repo_id in russian_models:
570
+ return russian_models[repo_id](repo_id, speed)
571
+ elif repo_id in arabic_models:
572
+ return arabic_models[repo_id](repo_id, speed)
573
+ elif repo_id in catalan_models:
574
+ return catalan_models[repo_id](repo_id, speed)
575
+ elif repo_id in czech_models:
576
+ return czech_models[repo_id](repo_id, speed)
577
+ elif repo_id in danish_models:
578
+ return danish_models[repo_id](repo_id, speed)
579
+ elif repo_id in greek_models:
580
+ return greek_models[repo_id](repo_id, speed)
581
+ elif repo_id in finnish_models:
582
+ return finnish_models[repo_id](repo_id, speed)
583
+ elif repo_id in hungarian_models:
584
+ return hungarian_models[repo_id](repo_id, speed)
585
+ elif repo_id in icelandic_models:
586
+ return icelandic_models[repo_id](repo_id, speed)
587
+ elif repo_id in italian_models:
588
+ return italian_models[repo_id](repo_id, speed)
589
+ elif repo_id in georgian_models:
590
+ return georgian_models[repo_id](repo_id, speed)
591
+ elif repo_id in kazakh_models:
592
+ return kazakh_models[repo_id](repo_id, speed)
593
+ elif repo_id in luxembourgish_models:
594
+ return luxembourgish_models[repo_id](repo_id, speed)
595
+ elif repo_id in nepali_models:
596
+ return nepali_models[repo_id](repo_id, speed)
597
+ elif repo_id in dutch_models:
598
+ return dutch_models[repo_id](repo_id, speed)
599
+ elif repo_id in norwegian_models:
600
+ return norwegian_models[repo_id](repo_id, speed)
601
+ elif repo_id in polish_models:
602
+ return polish_models[repo_id](repo_id, speed)
603
+ elif repo_id in portuguese_models:
604
+ return portuguese_models[repo_id](repo_id, speed)
605
+ elif repo_id in romanian_models:
606
+ return romanian_models[repo_id](repo_id, speed)
607
+ elif repo_id in slovak_models:
608
+ return slovak_models[repo_id](repo_id, speed)
609
+ elif repo_id in serbian_models:
610
+ return serbian_models[repo_id](repo_id, speed)
611
+ elif repo_id in swedish_models:
612
+ return swedish_models[repo_id](repo_id, speed)
613
+ elif repo_id in swahili_models:
614
+ return swahili_models[repo_id](repo_id, speed)
615
+ elif repo_id in turkish_models:
616
+ return turkish_models[repo_id](repo_id, speed)
617
+ elif repo_id in vietnamese_models:
618
+ return vietnamese_models[repo_id](repo_id, speed)
619
+ elif repo_id in bulgarian_models:
620
+ return bulgarian_models[repo_id](repo_id, speed)
621
+ elif repo_id in estonian_models:
622
+ return estonian_models[repo_id](repo_id, speed)
623
+ elif repo_id in irish_models:
624
+ return irish_models[repo_id](repo_id, speed)
625
+ elif repo_id in croatian_models:
626
+ return croatian_models[repo_id](repo_id, speed)
627
+ elif repo_id in lithuanian_models:
628
+ return lithuanian_models[repo_id](repo_id, speed)
629
+ elif repo_id in latvian_models:
630
+ return latvian_models[repo_id](repo_id, speed)
631
+ elif repo_id in maltese_models:
632
+ return maltese_models[repo_id](repo_id, speed)
633
+ elif repo_id in slovenian_models:
634
+ return slovenian_models[repo_id](repo_id, speed)
635
+ elif repo_id in bengali_models:
636
+ return bengali_models[repo_id](repo_id, speed)
637
+ elif repo_id in min_nan_models:
638
+ return min_nan_models[repo_id](repo_id, speed)
639
+ elif repo_id in thai_models:
640
+ return thai_models[repo_id](repo_id, speed)
641
+ elif repo_id in persian_models:
642
+ return persian_models[repo_id](repo_id, speed)
643
+ elif repo_id in korean_models:
644
+ return korean_models[repo_id](repo_id, speed)
645
+ elif repo_id in afrikaans_models:
646
+ return afrikaans_models[repo_id](repo_id, speed)
647
+ elif repo_id in gujarati_models:
648
+ return gujarati_models[repo_id](repo_id, speed)
649
+ elif repo_id in tswana_models:
650
+ return tswana_models[repo_id](repo_id, speed)
651
+ elif repo_id in welsh_models:
652
+ return welsh_models[repo_id](repo_id, speed)
653
+ else:
654
+ raise ValueError(f"Unsupported repo_id: {repo_id}")
655
+
656
+
657
+ cantonese_models = {
658
+ "csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf,
659
+ }
660
+
661
+ chinese_english_models = {
662
+ "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
663
+ "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
664
+ "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
665
+ }
666
+
667
+ persian_english_models = {
668
+ "csukuangfj/matcha-tts-fa_en-khadijah|1 speaker": _get_matcha_hf_espeak, # 1
669
+ "csukuangfj/matcha-tts-fa_en-musa|1 speaker": _get_matcha_hf_espeak, # 1
670
+ "csukuangfj/vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium|1": _get_vits_piper, # 1
671
+ }
672
+
673
+ chinese_models = {
674
+ "csukuangfj/matcha-icefall-zh-baker|1 speaker": _get_matcha_hf, # 1
675
+ "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker": _get_vits_hf, # 1
676
+ "csukuangfj/vits-zh-hf-fanchen-C|187 speakers": _get_vits_hf, # 187
677
+ "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers": _get_vits_hf, # 804
678
+ "csukuangfj/vits-zh-hf-keqing|804 speakers": _get_vits_hf, # 804
679
+ "csukuangfj/vits-zh-hf-theresa|804 speakers": _get_vits_hf, # 804
680
+ "csukuangfj/vits-zh-hf-eula|804 speakers": _get_vits_hf, # 804
681
+ "csukuangfj/vits-zh-hf-echo|804 speakers": _get_vits_hf, # 804
682
+ "csukuangfj/vits-zh-hf-bronya|804 speakers": _get_vits_hf, # 804
683
+ "csukuangfj/vits-zh-hf-doom|804 speakers": _get_vits_hf, # 804
684
+ "csukuangfj/vits-zh-hf-zenyatta|804 speakers": _get_vits_hf, # 804
685
+ "csukuangfj/vits-zh-hf-abyssinvoker|804 speakers": _get_vits_hf, # 804
686
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe|1 speaker": _get_vits_hf, # 1
687
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new|1 speaker": _get_vits_hf, # 1
688
+ "csukuangfj/vits-zh-hf-fanchen-unity|1 speaker": _get_vits_hf, # 1
689
+ "csukuangfj/vits-zh-aishell3|174 speakers": _get_vits_zh_aishell3,
690
+ "csukuangfj/vits-piper-zh_CN-huayan-medium|1 speaker": _get_vits_piper,
691
+ # "csukuangfj/vits-piper-zh_CN-huayan-x_low": _get_vits_piper,
692
+ }
693
+
694
+ english_models = {
695
+ "csukuangfj/kokoro-en-v0_19|11 speakers": _get_kokoro,
696
+ "csukuangfj/vits-piper-en_US-glados-high|1 speaker": _get_vits_piper,
697
+ "csukuangfj/vits-piper-en_US-glados|1 speaker": _get_vits_piper,
698
+ "csukuangfj/vits-piper-en_GB-southern_english_male-medium|8 speakers": _get_vits_piper,
699
+ "csukuangfj/vits-piper-en_GB-southern_english_female-medium|6 speakers": _get_vits_piper,
700
+ "csukuangfj/vits-piper-en_US-bryce-medium|1 speaker": _get_vits_piper,
701
+ "csukuangfj/vits-piper-en_US-john-medium|1 speaker": _get_vits_piper,
702
+ "csukuangfj/vits-piper-en_US-norman-medium|1 speaker": _get_vits_piper,
703
+ # coqui-ai
704
+ "csukuangfj/vits-coqui-en-ljspeech|1 speaker": _get_vits_piper,
705
+ "csukuangfj/vits-coqui-en-ljspeech-neon|1 speaker": _get_vits_piper,
706
+ "csukuangfj/vits-coqui-en-vctk|109 speakers": _get_vits_piper,
707
+ # piper, US
708
+ "csukuangfj/vits-piper-en_GB-sweetbbak-amy|1 speaker": _get_vits_piper,
709
+ "csukuangfj/vits-piper-en_US-amy-low|1 speaker": _get_vits_piper,
710
+ "csukuangfj/vits-piper-en_US-amy-medium|1 speaker": _get_vits_piper,
711
+ "csukuangfj/vits-piper-en_US-arctic-medium|18 speakers": _get_vits_piper, # 18 speakers
712
+ "csukuangfj/vits-piper-en_US-danny-low|1 speaker": _get_vits_piper,
713
+ "csukuangfj/vits-piper-en_US-hfc_male-medium|1 speaker": _get_vits_piper,
714
+ "csukuangfj/vits-piper-en_US-hfc_female-medium|1 speaker": _get_vits_piper,
715
+ "csukuangfj/vits-piper-en_US-joe-medium|1 speaker": _get_vits_piper,
716
+ "csukuangfj/vits-piper-en_US-kathleen-low|1 speaker": _get_vits_piper,
717
+ "csukuangfj/vits-piper-en_US-kusal-medium|1 speaker": _get_vits_piper,
718
+ "csukuangfj/vits-piper-en_US-l2arctic-medium|24 speakers": _get_vits_piper, # 24 speakers
719
+ "csukuangfj/vits-piper-en_US-lessac-high|1 speaker": _get_vits_piper,
720
+ "csukuangfj/vits-piper-en_US-lessac-low|1 speaker": _get_vits_piper,
721
+ "csukuangfj/vits-piper-en_US-lessac-medium|1 speaker": _get_vits_piper,
722
+ "csukuangfj/vits-piper-en_US-libritts-high|904 speakers": _get_vits_piper, # 904 speakers
723
+ "csukuangfj/vits-piper-en_US-libritts_r-medium|904 speakers": _get_vits_piper, # 904 speakers
724
+ "csukuangfj/vits-piper-en_US-ljspeech-high|1 speaker": _get_vits_piper,
725
+ "csukuangfj/vits-piper-en_US-ljspeech-medium|1 speaker": _get_vits_piper,
726
+ "csukuangfj/vits-piper-en_US-ryan-high|1 speaker": _get_vits_piper,
727
+ "csukuangfj/vits-piper-en_US-ryan-low|1 speaker": _get_vits_piper,
728
+ "csukuangfj/vits-piper-en_US-ryan-medium|1 speaker": _get_vits_piper,
729
+ # piper, GB
730
+ "csukuangfj/vits-piper-en_GB-alan-low|1 speaker": _get_vits_piper,
731
+ "csukuangfj/vits-piper-en_GB-alan-medium|1 speaker": _get_vits_piper,
732
+ "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
733
+ "csukuangfj/vits-piper-en_GB-cori-high|1 speaker": _get_vits_piper,
734
+ "csukuangfj/vits-piper-en_GB-cori-medium|1 speaker": _get_vits_piper,
735
+ "csukuangfj/vits-piper-en_GB-jenny_dioco-medium|1 speaker": _get_vits_piper,
736
+ "csukuangfj/vits-piper-en_GB-northern_english_male-medium|1 speaker": _get_vits_piper,
737
+ "csukuangfj/vits-piper-en_GB-semaine-medium|4 speakers": _get_vits_piper,
738
+ "csukuangfj/vits-piper-en_GB-southern_english_female-low|1 speaker": _get_vits_piper,
739
+ "csukuangfj/vits-piper-en_GB-vctk-medium|109 speakers": _get_vits_piper,
740
+ #
741
+ "csukuangfj/vits-vctk|109 speakers": _get_vits_vctk, # 109 speakers
742
+ "csukuangfj/vits-ljs|1 speaker": _get_vits_ljs,
743
+ }
744
+
745
+ german_models = {
746
+ "csukuangfj/vits-piper-de_DE-glados-low|1 speaker": _get_vits_piper,
747
+ "csukuangfj/vits-piper-de_DE-glados-medium|1 speaker": _get_vits_piper,
748
+ "csukuangfj/vits-piper-de_DE-glados-high|1 speaker": _get_vits_piper,
749
+ "csukuangfj/vits-coqui-de-css10|1 speaker": _get_vits_piper,
750
+ "csukuangfj/vits-piper-de_DE-eva_k-x_low|1 speaker": _get_vits_piper,
751
+ "csukuangfj/vits-piper-de_DE-karlsson-low|1 speaker": _get_vits_piper,
752
+ "csukuangfj/vits-piper-de_DE-kerstin-low|1 speaker": _get_vits_piper,
753
+ # "csukuangfj/vits-piper-de_DE-mls-medium": _get_vits_piper,
754
+ "csukuangfj/vits-piper-de_DE-pavoque-low|1 speaker": _get_vits_piper,
755
+ "csukuangfj/vits-piper-de_DE-ramona-low|1 speaker": _get_vits_piper,
756
+ "csukuangfj/vits-piper-de_DE-thorsten-low|1 speaker": _get_vits_piper,
757
+ "csukuangfj/vits-piper-de_DE-thorsten-medium|1 speaker": _get_vits_piper,
758
+ "csukuangfj/vits-piper-de_DE-thorsten-high|1 speaker": _get_vits_piper,
759
+ "csukuangfj/vits-piper-de_DE-thorsten_emotional-medium|8 speakers": _get_vits_piper, # 8 speakers
760
+ }
761
+
762
+ spanish_models = {
763
+ # "csukuangfj/vits-coqui-es-css10": _get_vits_piper,
764
+ "csukuangfj/vits-piper-es-glados-medium": _get_vits_piper,
765
+ "csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper,
766
+ "csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper,
767
+ # "csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper,
768
+ # "csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper,
769
+ "csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, # 2 speakers
770
+ "csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper,
771
+ "csukuangfj/vits-piper-es_MX-claude-high": _get_vits_piper,
772
+ "csukuangfj/vits-mimic3-es_ES-m-ailabs_low": _get_vits_piper,
773
+ }
774
+
775
+ french_models = {
776
+ "csukuangfj/vits-coqui-fr-css10": _get_vits_piper,
777
+ # "csukuangfj/vits-piper-fr_FR-gilles-low": _get_vits_piper,
778
+ # "csukuangfj/vits-piper-fr_FR-mls_1840-low": _get_vits_piper,
779
+ # "csukuangfj/vits-piper-fr_FR-mls-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
780
+ "csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
781
+ "csukuangfj/vits-piper-fr_FR-tom-medium|1 speaker": _get_vits_piper, # 2 speakers, 0-femal, 1-male
782
+ "csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, # female
783
+ "csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper,
784
+ "csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper,
785
+ "csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper,
786
+ "csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper,
787
+ }
788
+
789
+ ukrainian_models = {
790
+ "csukuangfj/vits-piper-uk_UA-lada-x_low": _get_vits_piper,
791
+ "csukuangfj/vits-coqui-uk-mai": _get_vits_piper,
792
+ # "csukuangfj/vits-piper-uk_UA-ukrainian_tts-medium": _get_vits_piper, # does not work somehow
793
+ }
794
+
795
+ russian_models = {
796
+ "csukuangfj/vits-piper-ru_RU-denis-medium": _get_vits_piper,
797
+ "csukuangfj/vits-piper-ru_RU-dmitri-medium": _get_vits_piper,
798
+ "csukuangfj/vits-piper-ru_RU-irina-medium": _get_vits_piper,
799
+ "csukuangfj/vits-piper-ru_RU-ruslan-medium": _get_vits_piper,
800
+ }
801
+
802
+ arabic_models = {
803
+ "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
804
+ "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
805
+ }
806
+
807
+ catalan_models = {
808
+ "csukuangfj/vits-piper-ca_ES-upc_ona-x_low": _get_vits_piper,
809
+ "csukuangfj/vits-piper-ca_ES-upc_ona-medium": _get_vits_piper,
810
+ "csukuangfj/vits-piper-ca_ES-upc_pau-x_low": _get_vits_piper,
811
+ }
812
+
813
+ czech_models = {
814
+ "csukuangfj/vits-piper-cs_CZ-jirka-low": _get_vits_piper,
815
+ "csukuangfj/vits-piper-cs_CZ-jirka-medium": _get_vits_piper,
816
+ "csukuangfj/vits-coqui-cs-cv": _get_vits_piper,
817
+ }
818
+
819
+ danish_models = {
820
+ "csukuangfj/vits-coqui-da-cv": _get_vits_piper,
821
+ "csukuangfj/vits-piper-da_DK-talesyntese-medium": _get_vits_piper,
822
+ }
823
+
824
+ greek_models = {
825
+ "csukuangfj/vits-piper-el_GR-rapunzelina-low": _get_vits_piper,
826
+ # "csukuangfj/vits-mimic3-el_GR-rapunzelina_low": _get_vits_piper,
827
+ }
828
+
829
+ finnish_models = {
830
+ "csukuangfj/vits-coqui-fi-css10": _get_vits_piper,
831
+ "csukuangfj/vits-piper-fi_FI-harri-low": _get_vits_piper,
832
+ "csukuangfj/vits-piper-fi_FI-harri-medium": _get_vits_piper,
833
+ "csukuangfj/vits-mimic3-fi_FI-harri-tapani-ylilammi_low": _get_vits_piper,
834
+ }
835
+
836
+ hungarian_models = {
837
+ # "csukuangfj/vits-coqui-hu-css10": _get_vits_piper,
838
+ "csukuangfj/vits-piper-hu_HU-anna-medium": _get_vits_piper,
839
+ "csukuangfj/vits-piper-hu_HU-berta-medium": _get_vits_piper,
840
+ "csukuangfj/vits-piper-hu_HU-imre-medium": _get_vits_piper,
841
+ "csukuangfj/vits-mimic3-hu_HU-diana-majlinger_low": _get_vits_piper,
842
+ }
843
+
844
+ icelandic_models = {
845
+ "csukuangfj/vits-piper-is_IS-bui-medium": _get_vits_piper,
846
+ "csukuangfj/vits-piper-is_IS-salka-medium": _get_vits_piper,
847
+ "csukuangfj/vits-piper-is_IS-steinn-medium": _get_vits_piper,
848
+ "csukuangfj/vits-piper-is_IS-ugla-medium": _get_vits_piper,
849
+ }
850
+
851
+ italian_models = {
852
+ "csukuangfj/vits-piper-it_IT-riccardo-x_low": _get_vits_piper,
853
+ "csukuangfj/vits-piper-it_IT-paola-medium": _get_vits_piper,
854
+ }
855
+
856
+ georgian_models = {
857
+ "csukuangfj/vits-piper-ka_GE-natia-medium": _get_vits_piper,
858
+ }
859
+
860
+ kazakh_models = {
861
+ "csukuangfj/vits-piper-kk_KZ-iseke-x_low": _get_vits_piper,
862
+ "csukuangfj/vits-piper-kk_KZ-issai-high": _get_vits_piper,
863
+ "csukuangfj/vits-piper-kk_KZ-raya-x_low": _get_vits_piper,
864
+ }
865
+
866
+ luxembourgish_models = {
867
+ "csukuangfj/vits-piper-lb_LU-marylux-medium": _get_vits_piper,
868
+ }
869
+
870
+ nepali_models = {
871
+ "csukuangfj/vits-piper-ne_NP-google-medium": _get_vits_piper,
872
+ "csukuangfj/vits-piper-ne_NP-google-x_low": _get_vits_piper,
873
+ "csukuangfj/vits-mimic3-ne_NP-ne-google_low": _get_vits_piper,
874
+ }
875
+
876
+ dutch_models = {
877
+ "csukuangfj/vits-coqui-nl-css10": _get_vits_piper,
878
+ "csukuangfj/vits-piper-nl_BE-nathalie-medium": _get_vits_piper,
879
+ "csukuangfj/vits-piper-nl_BE-nathalie-x_low": _get_vits_piper,
880
+ "csukuangfj/vits-piper-nl_BE-rdh-medium": _get_vits_piper,
881
+ "csukuangfj/vits-piper-nl_BE-rdh-x_low": _get_vits_piper,
882
+ # "csukuangfj/vits-piper-nl_NL-mls-medium": _get_vits_piper,
883
+ # "csukuangfj/vits-piper-nl_NL-mls_5809-low": _get_vits_piper,
884
+ # "csukuangfj/vits-piper-nl_NL-mls_7432-low": _get_vits_piper,
885
+ }
886
+
887
+ norwegian_models = {
888
+ "csukuangfj/vits-piper-no_NO-talesyntese-medium": _get_vits_piper,
889
+ }
890
+
891
+ polish_models = {
892
+ "csukuangfj/vits-coqui-pl-mai_female": _get_vits_piper,
893
+ "csukuangfj/vits-piper-pl_PL-darkman-medium": _get_vits_piper,
894
+ "csukuangfj/vits-piper-pl_PL-gosia-medium": _get_vits_piper,
895
+ "csukuangfj/vits-piper-pl_PL-mc_speech-medium": _get_vits_piper,
896
+ # "csukuangfj/vits-piper-pl_PL-mls_6892-low": _get_vits_piper,
897
+ "csukuangfj/vits-mimic3-pl_PL-m-ailabs_low": _get_vits_piper,
898
+ }
899
+
900
+ portuguese_models = {
901
+ "csukuangfj/vits-coqui-pt-cv": _get_vits_piper,
902
+ "csukuangfj/vits-piper-pt_BR-edresson-low": _get_vits_piper,
903
+ "csukuangfj/vits-piper-pt_BR-faber-medium": _get_vits_piper,
904
+ "csukuangfj/vits-piper-pt_PT-tugao-medium": _get_vits_piper,
905
+ }
906
+
907
+ romanian_models = {
908
+ "csukuangfj/vits-coqui-ro-cv": _get_vits_piper,
909
+ "csukuangfj/vits-piper-ro_RO-mihai-medium": _get_vits_piper,
910
+ }
911
+
912
+
913
+ slovak_models = {
914
+ "csukuangfj/vits-coqui-sk-cv": _get_vits_piper,
915
+ "csukuangfj/vits-piper-sk_SK-lili-medium": _get_vits_piper,
916
+ }
917
+
918
+ serbian_models = {
919
+ "csukuangfj/vits-piper-sr_RS-serbski_institut-medium": _get_vits_piper,
920
+ }
921
+
922
+ swedish_models = {
923
+ "csukuangfj/vits-coqui-sv-cv": _get_vits_piper,
924
+ "csukuangfj/vits-piper-sv_SE-nst-medium": _get_vits_piper,
925
+ }
926
+
927
+ swahili_models = {
928
+ "csukuangfj/vits-piper-sw_CD-lanfrica-medium": _get_vits_piper,
929
+ }
930
+
931
+ turkish_models = {
932
+ "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
933
+ "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
934
+ "csukuangfj/vits-piper-tr_TR-fettah-medium|1 speaker": _get_vits_piper,
935
+ }
936
+
937
+ vietnamese_models = {
938
+ "csukuangfj/vits-piper-vi_VN-25hours_single-low": _get_vits_piper,
939
+ "csukuangfj/vits-piper-vi_VN-vais1000-medium": _get_vits_piper,
940
+ "csukuangfj/vits-piper-vi_VN-vivos-x_low": _get_vits_piper,
941
+ "csukuangfj/vits-mimic3-vi_VN-vais1000_low": _get_vits_piper,
942
+ }
943
+
944
+ bulgarian_models = {
945
+ "csukuangfj/vits-coqui-bg-cv": _get_vits_piper,
946
+ }
947
+
948
+ estonian_models = {
949
+ "csukuangfj/vits-coqui-et-cv": _get_vits_piper,
950
+ }
951
+
952
+ irish_models = {
953
+ "csukuangfj/vits-coqui-ga-cv": _get_vits_piper,
954
+ }
955
+
956
+ croatian_models = {
957
+ "csukuangfj/vits-coqui-hr-cv": _get_vits_piper,
958
+ }
959
+
960
+ lithuanian_models = {
961
+ "csukuangfj/vits-coqui-lt-cv": _get_vits_piper,
962
+ }
963
+
964
+ latvian_models = {
965
+ "csukuangfj/vits-piper-lv_LV-aivars-medium": _get_vits_piper,
966
+ "csukuangfj/vits-coqui-lv-cv": _get_vits_piper,
967
+ }
968
+
969
+ maltese_models = {
970
+ "csukuangfj/vits-coqui-mt-cv": _get_vits_piper,
971
+ }
972
+
973
+ slovenian_models = {
974
+ "csukuangfj/vits-piper-sl_SI-artur-medium": _get_vits_piper,
975
+ "csukuangfj/vits-coqui-sl-cv": _get_vits_piper,
976
+ }
977
+
978
+ # Bangla
979
+ bengali_models = {
980
+ "csukuangfj/vits-coqui-bn-custom_female": _get_vits_piper,
981
+ "csukuangfj/vits-mimic3-bn-multi_low": _get_vits_piper,
982
+ }
983
+
984
+ min_nan_models = {
985
+ "csukuangfj/vits-mms-nan": _get_vits_mms,
986
+ }
987
+
988
+ thai_models = {
989
+ "csukuangfj/vits-mms-tha": _get_vits_mms,
990
+ }
991
+
992
+ persian_models = {
993
+ "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
994
+ "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
995
+ "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
996
+ }
997
+
998
+ korean_models = {
999
+ "csukuangfj/vits-mimic3-ko_KO-kss_low": _get_vits_piper,
1000
+ }
1001
+
1002
+
1003
+ afrikaans_models = {
1004
+ "csukuangfj/vits-mimic3-af_ZA-google-nwu_low": _get_vits_piper,
1005
+ }
1006
+
1007
+ gujarati_models = {
1008
+ "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
1009
+ }
1010
+
1011
+ tswana_models = {
1012
+ "csukuangfj/vits-mimic3-tn_ZA-google-nwu_low": _get_vits_piper,
1013
+ }
1014
+
1015
+ welsh_models = {
1016
+ "csukuangfj/vits-piper-cy_GB-gwryw_gogleddol-medium|1 speaker": _get_vits_piper,
1017
+ }
1018
+
1019
+ language_to_models = {
1020
+ "English": list(english_models.keys()),
1021
+ "Chinese (Mandarin, 普通话)": list(chinese_models.keys()),
1022
+ "Chinese+English": list(chinese_english_models.keys()),
1023
+ "Persian+English": list(persian_english_models.keys()),
1024
+ "Cantonese (粤语)": list(cantonese_models.keys()),
1025
+ "Min-nan (闽南话)": list(min_nan_models.keys()),
1026
+ "Arabic": list(arabic_models.keys()),
1027
+ "Afrikaans": list(afrikaans_models.keys()),
1028
+ "Bengali": list(bengali_models.keys()),
1029
+ "Bulgarian": list(bulgarian_models.keys()),
1030
+ "Catalan": list(catalan_models.keys()),
1031
+ "Croatian": list(croatian_models.keys()),
1032
+ "Czech": list(czech_models.keys()),
1033
+ "Danish": list(danish_models.keys()),
1034
+ "Dutch": list(dutch_models.keys()),
1035
+ "Estonian": list(estonian_models.keys()),
1036
+ "Finnish": list(finnish_models.keys()),
1037
+ "French": list(french_models.keys()),
1038
+ "Georgian": list(georgian_models.keys()),
1039
+ "German": list(german_models.keys()),
1040
+ "Greek": list(greek_models.keys()),
1041
+ "Gujarati": list(gujarati_models.keys()),
1042
+ "Hungarian": list(hungarian_models.keys()),
1043
+ "Icelandic": list(icelandic_models.keys()),
1044
+ "Irish": list(irish_models.keys()),
1045
+ "Italian": list(italian_models.keys()),
1046
+ "Kazakh": list(kazakh_models.keys()),
1047
+ "Korean": list(korean_models.keys()),
1048
+ "Latvian": list(latvian_models.keys()),
1049
+ "Lithuanian": list(lithuanian_models.keys()),
1050
+ "Luxembourgish": list(luxembourgish_models.keys()),
1051
+ "Maltese": list(maltese_models.keys()),
1052
+ "Nepali": list(nepali_models.keys()),
1053
+ "Norwegian": list(norwegian_models.keys()),
1054
+ "Persian": list(persian_models.keys()),
1055
+ "Polish": list(polish_models.keys()),
1056
+ "Portuguese": list(portuguese_models.keys()),
1057
+ "Romanian": list(romanian_models.keys()),
1058
+ "Russian": list(russian_models.keys()),
1059
+ "Serbian": list(serbian_models.keys()),
1060
+ "Slovak": list(slovak_models.keys()),
1061
+ "Slovenian": list(slovenian_models.keys()),
1062
+ "Spanish": list(spanish_models.keys()),
1063
+ "Swahili": list(swahili_models.keys()),
1064
+ "Swedish": list(swedish_models.keys()),
1065
+ "Thai": list(thai_models.keys()),
1066
+ "Tswana": list(tswana_models.keys()),
1067
+ "Turkish": list(turkish_models.keys()),
1068
+ "Ukrainian": list(ukrainian_models.keys()),
1069
+ "Vietnamese": list(vietnamese_models.keys()),
1070
+ "Welsh": list(welsh_models.keys()),
1071
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.10.46/sherpa_onnx-1.10.46-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
2
+ #sherpa-onnx>=1.10.42
3
+
4
+ soundfile