Spaces:

deep-div
/

Text-to-Speech-TTS

Running

App Files Files Community

deep-div commited on 16 days ago

Commit

1ef2f3c

verified ·

1 Parent(s): df4f8a7

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -15

app.py CHANGED Viewed

@@ -1,4 +1,23 @@
 #!/usr/bin/env python3
 import os
 import time
@@ -20,17 +39,30 @@ def MyPrint(s):
 title = "# Next-gen Kaldi: Text-to-speech (TTS)"
 description = """
-This space shows how to convert text to speech with Next-gen Kaldi (Sherpa-ONNX).
-Running on CPU in a Docker container from Hugging Face.
-Useful links:
 - <https://github.com/k2-fsa/sherpa-onnx>
-- <https://k2-fsa.github.io/sherpa/>
-- <https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
 """
 css = """
 .result {display:flex;flex-direction:column}
 .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
@@ -41,28 +73,43 @@ css = """
 examples = [
     [
         "English",
-        "csukuangfj/vits-piper-en_US-libritts-rhea|1 speaker",
-        "Hello! This is a test of the text-to-speech demo powered by Sherpa-ONNX.",
         0,
         1.0,
     ],
     [
         "English",
-        "csukuangfj/vits-piper-en_GB-alba|1 speaker",
-        "Welcome to the world of AI voice generation. Enjoy the experience.",
         0,
         1.0,
     ],
     [
         "English",
-        "csukuangfj/vits-piper-en_US-lessac-medium|1 speaker",
-        "Real-time voice synthesis helps developers build smarter applications.",
         0,
         1.0,
     ],
 ]
 def update_model_dropdown(language: str):
     if language in language_to_models:
         choices = language_to_models[language]
@@ -127,6 +174,7 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
 demo = gr.Blocks(css=css)
 with demo:
     gr.Markdown(title)
     language_choices = list(language_to_models.keys())
@@ -134,13 +182,13 @@ with demo:
     language_radio = gr.Radio(
         label="Language",
         choices=language_choices,
-        value="English",  # Default language
     )
     model_dropdown = gr.Dropdown(
-        choices=language_to_models["English"],  # Default model list
         label="Select a model",
-        value=language_to_models["English"][0],  # Default model
     )
     language_radio.change(
@@ -164,7 +212,7 @@ with demo:
                 lines=1,
                 max_lines=1,
                 value="0",
-                placeholder="Speaker ID. Valid only for multi-speaker model",
             )
             input_speed = gr.Slider(
@@ -227,4 +275,6 @@ def download_espeak_ng_data():
 if __name__ == "__main__":
     download_espeak_ng_data()
     demo.launch()

 #!/usr/bin/env python3
+#
+# Copyright      2022-2023  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# References:
+# https://gradio.app/docs/#dropdown
 import os
 import time
 title = "# Next-gen Kaldi: Text-to-speech (TTS)"
 description = """
+This space shows how to convert text to speech with Next-gen Kaldi.
+It is running on CPU within a docker container provided by Hugging Face.
+See more information by visiting the following links:
 - <https://github.com/k2-fsa/sherpa-onnx>
+If you want to deploy it locally, please see
+<https://k2-fsa.github.io/sherpa/>
+If you want to use Android APKs, please see
+<https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
+If you want to use Android text-to-speech engine APKs, please see
+<https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
+If you want to download an all-in-one exe for Windows, please see
+<https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
 """
+# css style is copied from
+# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
 css = """
 .result {display:flex;flex-direction:column}
 .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
 examples = [
     [
         "English",
+        "csukuangfj/vits-en-us",
+        "Welcome to our next-generation text-to-speech demo. This is powered by Sherpa-ONNX.",
+        0,
+        1.0,
+    ],
+    [
+        "English",
+        "csukuangfj/vits-en-us",
+        "Artificial intelligence is transforming industries with innovative voice technologies.",
         0,
         1.0,
     ],
     [
         "English",
+        "csukuangfj/vits-en-us",
+        "The quick brown fox jumps over the lazy dog. 1234567890.",
         0,
         1.0,
     ],
     [
         "English",
+        "csukuangfj/vits-en-us",
+        "Today is a great day to explore machine learning and natural language processing.",
+        0,
+        1.0,
+    ],
+    [
+        "English",
+        "csukuangfj/vits-en-us",
+        "Call 911 in case of emergency. The meeting is scheduled for August 15th, 2025.",
         0,
         1.0,
     ],
 ]
 def update_model_dropdown(language: str):
     if language in language_to_models:
         choices = language_to_models[language]
 demo = gr.Blocks(css=css)
 with demo:
     gr.Markdown(title)
     language_choices = list(language_to_models.keys())
     language_radio = gr.Radio(
         label="Language",
         choices=language_choices,
+        value=language_choices[0],
     )
     model_dropdown = gr.Dropdown(
+        choices=language_to_models[language_choices[0]],
         label="Select a model",
+        value=language_to_models[language_choices[0]][0],
     )
     language_radio.change(
                 lines=1,
                 max_lines=1,
                 value="0",
+                placeholder="Speaker ID. Valid only for mult-speaker model",
             )
             input_speed = gr.Slider(
 if __name__ == "__main__":
     download_espeak_ng_data()
+    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
     demo.launch()