Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -210,6 +210,7 @@ def split_text_into_batches(text, max_chars=200, split_words=SPLIT_WORDS):
|
|
| 210 |
|
| 211 |
return batches
|
| 212 |
|
|
|
|
| 213 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, progress=gr.Progress()):
|
| 214 |
if exp_name == "F5-TTS":
|
| 215 |
ema_model = F5TTS_ema_model
|
|
@@ -294,6 +295,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
| 294 |
|
| 295 |
return (target_sample_rate, final_wave), spectrogram_path
|
| 296 |
|
|
|
|
| 297 |
def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_split_words=''):
|
| 298 |
if not custom_split_words.strip():
|
| 299 |
custom_words = [word.strip() for word in custom_split_words.split(',')]
|
|
@@ -342,7 +344,8 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_s
|
|
| 342 |
|
| 343 |
gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
|
| 344 |
return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence)
|
| 345 |
-
|
|
|
|
| 346 |
def generate_podcast(script, speaker1_name, ref_audio1, ref_text1, speaker2_name, ref_audio2, ref_text2, exp_name, remove_silence):
|
| 347 |
# Split the script into speaker blocks
|
| 348 |
speaker_pattern = re.compile(f"^({re.escape(speaker1_name)}|{re.escape(speaker2_name)}):", re.MULTILINE)
|
|
@@ -678,7 +681,8 @@ with gr.Blocks() as app_emotional:
|
|
| 678 |
|
| 679 |
# Output audio
|
| 680 |
audio_output_emotional = gr.Audio(label="Synthesized Audio")
|
| 681 |
-
|
|
|
|
| 682 |
def generate_emotional_speech(
|
| 683 |
regular_audio,
|
| 684 |
regular_ref_text,
|
|
@@ -801,24 +805,4 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
|
|
| 801 |
)
|
| 802 |
gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
|
| 803 |
|
| 804 |
-
|
| 805 |
-
@click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
|
| 806 |
-
@click.option("--host", "-H", default=None, help="Host to run the app on")
|
| 807 |
-
@click.option(
|
| 808 |
-
"--share",
|
| 809 |
-
"-s",
|
| 810 |
-
default=False,
|
| 811 |
-
is_flag=True,
|
| 812 |
-
help="Share the app via Gradio share link",
|
| 813 |
-
)
|
| 814 |
-
@click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
|
| 815 |
-
def main(port, host, share, api):
|
| 816 |
-
global app
|
| 817 |
-
print(f"Starting app...")
|
| 818 |
-
app.queue(api_open=api).launch(
|
| 819 |
-
server_name=host, server_port=port, share=share, show_api=api
|
| 820 |
-
)
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
if __name__ == "__main__":
|
| 824 |
-
main()
|
|
|
|
| 210 |
|
| 211 |
return batches
|
| 212 |
|
| 213 |
+
@spaces.GPU
|
| 214 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, progress=gr.Progress()):
|
| 215 |
if exp_name == "F5-TTS":
|
| 216 |
ema_model = F5TTS_ema_model
|
|
|
|
| 295 |
|
| 296 |
return (target_sample_rate, final_wave), spectrogram_path
|
| 297 |
|
| 298 |
+
@spaces.GPU
|
| 299 |
def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, custom_split_words=''):
|
| 300 |
if not custom_split_words.strip():
|
| 301 |
custom_words = [word.strip() for word in custom_split_words.split(',')]
|
|
|
|
| 344 |
|
| 345 |
gr.Info(f"Generating audio using {exp_name} in {len(gen_text_batches)} batches")
|
| 346 |
return infer_batch((audio, sr), ref_text, gen_text_batches, exp_name, remove_silence)
|
| 347 |
+
|
| 348 |
+
@spaces.GPU
|
| 349 |
def generate_podcast(script, speaker1_name, ref_audio1, ref_text1, speaker2_name, ref_audio2, ref_text2, exp_name, remove_silence):
|
| 350 |
# Split the script into speaker blocks
|
| 351 |
speaker_pattern = re.compile(f"^({re.escape(speaker1_name)}|{re.escape(speaker2_name)}):", re.MULTILINE)
|
|
|
|
| 681 |
|
| 682 |
# Output audio
|
| 683 |
audio_output_emotional = gr.Audio(label="Synthesized Audio")
|
| 684 |
+
|
| 685 |
+
@spaces.GPU
|
| 686 |
def generate_emotional_speech(
|
| 687 |
regular_audio,
|
| 688 |
regular_ref_text,
|
|
|
|
| 805 |
)
|
| 806 |
gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
|
| 807 |
|
| 808 |
+
app.queue().launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|