Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,23 @@
|
|
1 |
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import os
|
4 |
import time
|
@@ -20,17 +39,30 @@ def MyPrint(s):
|
|
20 |
title = "# Next-gen Kaldi: Text-to-speech (TTS)"
|
21 |
|
22 |
description = """
|
23 |
-
This space shows how to convert text to speech with Next-gen Kaldi
|
24 |
|
25 |
-
|
26 |
|
27 |
-
|
28 |
|
29 |
- <https://github.com/k2-fsa/sherpa-onnx>
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
|
|
|
|
|
34 |
css = """
|
35 |
.result {display:flex;flex-direction:column}
|
36 |
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
@@ -41,28 +73,43 @@ css = """
|
|
41 |
examples = [
|
42 |
[
|
43 |
"English",
|
44 |
-
"csukuangfj/vits-
|
45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
0,
|
47 |
1.0,
|
48 |
],
|
49 |
[
|
50 |
"English",
|
51 |
-
"csukuangfj/vits-
|
52 |
-
"
|
53 |
0,
|
54 |
1.0,
|
55 |
],
|
56 |
[
|
57 |
"English",
|
58 |
-
"csukuangfj/vits-
|
59 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
0,
|
61 |
1.0,
|
62 |
],
|
63 |
]
|
64 |
|
65 |
|
|
|
66 |
def update_model_dropdown(language: str):
|
67 |
if language in language_to_models:
|
68 |
choices = language_to_models[language]
|
@@ -127,6 +174,7 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
|
|
127 |
|
128 |
demo = gr.Blocks(css=css)
|
129 |
|
|
|
130 |
with demo:
|
131 |
gr.Markdown(title)
|
132 |
language_choices = list(language_to_models.keys())
|
@@ -134,13 +182,13 @@ with demo:
|
|
134 |
language_radio = gr.Radio(
|
135 |
label="Language",
|
136 |
choices=language_choices,
|
137 |
-
value=
|
138 |
)
|
139 |
|
140 |
model_dropdown = gr.Dropdown(
|
141 |
-
choices=language_to_models[
|
142 |
label="Select a model",
|
143 |
-
value=language_to_models[
|
144 |
)
|
145 |
|
146 |
language_radio.change(
|
@@ -164,7 +212,7 @@ with demo:
|
|
164 |
lines=1,
|
165 |
max_lines=1,
|
166 |
value="0",
|
167 |
-
placeholder="Speaker ID. Valid only for
|
168 |
)
|
169 |
|
170 |
input_speed = gr.Slider(
|
@@ -227,4 +275,6 @@ def download_espeak_ng_data():
|
|
227 |
|
228 |
if __name__ == "__main__":
|
229 |
download_espeak_ng_data()
|
|
|
|
|
230 |
demo.launch()
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
+
#
|
3 |
+
# Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
|
4 |
+
#
|
5 |
+
# See LICENSE for clarification regarding multiple authors
|
6 |
+
#
|
7 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8 |
+
# you may not use this file except in compliance with the License.
|
9 |
+
# You may obtain a copy of the License at
|
10 |
+
#
|
11 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12 |
+
#
|
13 |
+
# Unless required by applicable law or agreed to in writing, software
|
14 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16 |
+
# See the License for the specific language governing permissions and
|
17 |
+
# limitations under the License.
|
18 |
+
|
19 |
+
# References:
|
20 |
+
# https://gradio.app/docs/#dropdown
|
21 |
|
22 |
import os
|
23 |
import time
|
|
|
39 |
title = "# Next-gen Kaldi: Text-to-speech (TTS)"
|
40 |
|
41 |
description = """
|
42 |
+
This space shows how to convert text to speech with Next-gen Kaldi.
|
43 |
|
44 |
+
It is running on CPU within a docker container provided by Hugging Face.
|
45 |
|
46 |
+
See more information by visiting the following links:
|
47 |
|
48 |
- <https://github.com/k2-fsa/sherpa-onnx>
|
49 |
+
|
50 |
+
If you want to deploy it locally, please see
|
51 |
+
<https://k2-fsa.github.io/sherpa/>
|
52 |
+
|
53 |
+
If you want to use Android APKs, please see
|
54 |
+
<https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
|
55 |
+
|
56 |
+
If you want to use Android text-to-speech engine APKs, please see
|
57 |
+
<https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
|
58 |
+
|
59 |
+
If you want to download an all-in-one exe for Windows, please see
|
60 |
+
<https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
|
61 |
+
|
62 |
"""
|
63 |
|
64 |
+
# css style is copied from
|
65 |
+
# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
|
66 |
css = """
|
67 |
.result {display:flex;flex-direction:column}
|
68 |
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
|
|
73 |
examples = [
|
74 |
[
|
75 |
"English",
|
76 |
+
"csukuangfj/vits-en-us",
|
77 |
+
"Welcome to our next-generation text-to-speech demo. This is powered by Sherpa-ONNX.",
|
78 |
+
0,
|
79 |
+
1.0,
|
80 |
+
],
|
81 |
+
[
|
82 |
+
"English",
|
83 |
+
"csukuangfj/vits-en-us",
|
84 |
+
"Artificial intelligence is transforming industries with innovative voice technologies.",
|
85 |
0,
|
86 |
1.0,
|
87 |
],
|
88 |
[
|
89 |
"English",
|
90 |
+
"csukuangfj/vits-en-us",
|
91 |
+
"The quick brown fox jumps over the lazy dog. 1234567890.",
|
92 |
0,
|
93 |
1.0,
|
94 |
],
|
95 |
[
|
96 |
"English",
|
97 |
+
"csukuangfj/vits-en-us",
|
98 |
+
"Today is a great day to explore machine learning and natural language processing.",
|
99 |
+
0,
|
100 |
+
1.0,
|
101 |
+
],
|
102 |
+
[
|
103 |
+
"English",
|
104 |
+
"csukuangfj/vits-en-us",
|
105 |
+
"Call 911 in case of emergency. The meeting is scheduled for August 15th, 2025.",
|
106 |
0,
|
107 |
1.0,
|
108 |
],
|
109 |
]
|
110 |
|
111 |
|
112 |
+
|
113 |
def update_model_dropdown(language: str):
|
114 |
if language in language_to_models:
|
115 |
choices = language_to_models[language]
|
|
|
174 |
|
175 |
demo = gr.Blocks(css=css)
|
176 |
|
177 |
+
|
178 |
with demo:
|
179 |
gr.Markdown(title)
|
180 |
language_choices = list(language_to_models.keys())
|
|
|
182 |
language_radio = gr.Radio(
|
183 |
label="Language",
|
184 |
choices=language_choices,
|
185 |
+
value=language_choices[0],
|
186 |
)
|
187 |
|
188 |
model_dropdown = gr.Dropdown(
|
189 |
+
choices=language_to_models[language_choices[0]],
|
190 |
label="Select a model",
|
191 |
+
value=language_to_models[language_choices[0]][0],
|
192 |
)
|
193 |
|
194 |
language_radio.change(
|
|
|
212 |
lines=1,
|
213 |
max_lines=1,
|
214 |
value="0",
|
215 |
+
placeholder="Speaker ID. Valid only for mult-speaker model",
|
216 |
)
|
217 |
|
218 |
input_speed = gr.Slider(
|
|
|
275 |
|
276 |
if __name__ == "__main__":
|
277 |
download_espeak_ng_data()
|
278 |
+
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
279 |
+
|
280 |
demo.launch()
|