Spaces:
Runtime error
Runtime error
import gradio as gr | |
import subprocess | |
import os | |
import sys | |
import soundfile as sf | |
import numpy as np | |
import torch.cuda | |
repo_url = "https://huggingface.co/dangtr0408/StyleTTS2-lite-vi" | |
repo_dir = "StyleTTS2-lite-vi" | |
if not os.path.exists(repo_dir): | |
subprocess.run(["git", "clone", repo_url, repo_dir]) | |
# Clone repo and load model | |
sys.path.append(os.path.abspath(repo_dir)) | |
from inference import StyleTTS2 | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
config_path = os.path.join(repo_dir, "Models", "config.yml") | |
models_path = os.path.join(repo_dir, "Models", "model_vi_en.pth") | |
model = StyleTTS2(config_path, models_path).to(device) | |
# Core inference function | |
def process_inputs(text_prompt, reference_audio_paths, | |
n_merge, randomness, smooth_dur, | |
denoise, t_denoise, split_dur): | |
speakers = {} | |
for i, path in enumerate(reference_audio_paths, 1): | |
speaker_id = f"id_{i}" | |
speakers[speaker_id] = { | |
"path": path, | |
"lang": "vi", | |
"speed": 1.1 | |
} | |
# Synthesize audio | |
r = model.generate( | |
text_prompt, speakers, denoise, t_denoise, | |
split_dur, "[id_1]", n_merge, randomness, smooth_dur | |
) | |
r = r / np.abs(r).max() | |
sf.write("output.wav", r, samplerate=24000) | |
return "output.wav" | |
custom_css = """ | |
#custom-box { | |
min-height: 300px !important; | |
display: flex; | |
align-items: center; | |
} | |
#custom-box textarea { | |
min-height: 250px !important; | |
height: 100% !important; | |
} | |
""" | |
# Gradio UI | |
with gr.Blocks(css=custom_css) as demo: | |
gr.Markdown("## StyleTTS2-lite-vi Demo") | |
gr.Markdown("Upload a reference audio and input your text to synthesize speech with style control.") | |
with gr.Row(): | |
text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", elem_id="custom-box") | |
reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box") | |
# Parameters | |
with gr.Accordion("Advanced Settings", open=False): | |
denoise = gr.Checkbox(label="Apply Denoising", value=True) | |
t_denoise = gr.Slider(0.0, 1.0, value=0.3, label="Denoise Strength") | |
n_merge = gr.Slider(1, 30, value=16, label="Min Words to Merge") | |
randomness = gr.Slider(0.0, 1.0, value=0.2, label="Randomness") | |
smooth_dur = gr.Slider(0.0, 1.0, value=0.15, label="Smooth Duration") | |
split_dur = gr.Slider(0, 10, step=1, value=3, label="Split Ref Audio Duration") | |
submit_button = gr.Button("Synthesize") | |
synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath") | |
submit_button.click( | |
fn=process_inputs, | |
inputs=[ | |
text_prompt, | |
reference_audios, | |
n_merge, | |
randomness, | |
smooth_dur, | |
denoise, | |
t_denoise, | |
split_dur | |
], | |
outputs=synthesized_audio | |
) | |
demo.launch() |