StyleTTS2-lite-vi-space

Runtime error

App Files Files Community

StyleTTS2-lite-vi-space / app.py

dangtr0408

init space

5b935e1 verified 3 months ago

raw

history blame

3.12 kB

	import gradio as gr
	import subprocess
	import os
	import sys
	import soundfile as sf
	import numpy as np
	import torch.cuda

	repo_url = "https://huggingface.co/dangtr0408/StyleTTS2-lite-vi"
	repo_dir = "StyleTTS2-lite-vi"

	if not os.path.exists(repo_dir):
	subprocess.run(["git", "clone", repo_url, repo_dir])

	# Clone repo and load model
	sys.path.append(os.path.abspath(repo_dir))
	from inference import StyleTTS2

	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	config_path = os.path.join(repo_dir, "Models", "config.yml")
	models_path = os.path.join(repo_dir, "Models", "model_vi_en.pth")
	model = StyleTTS2(config_path, models_path).to(device)

	# Core inference function
	def process_inputs(text_prompt, reference_audio_paths,
	n_merge, randomness, smooth_dur,
	denoise, t_denoise, split_dur):

	speakers = {}
	for i, path in enumerate(reference_audio_paths, 1):
	speaker_id = f"id_{i}"
	speakers[speaker_id] = {
	"path": path,
	"lang": "vi",
	"speed": 1.1
	}

	# Synthesize audio
	r = model.generate(
	text_prompt, speakers, denoise, t_denoise,
	split_dur, "[id_1]", n_merge, randomness, smooth_dur
	)

	r = r / np.abs(r).max()
	sf.write("output.wav", r, samplerate=24000)
	return "output.wav"

	custom_css = """
	#custom-box {
	min-height: 300px !important;
	display: flex;
	align-items: center;
	}
	#custom-box textarea {
	min-height: 250px !important;
	height: 100% !important;
	}
	"""

	# Gradio UI
	with gr.Blocks(css=custom_css) as demo:
	gr.Markdown("## StyleTTS2-lite-vi Demo")
	gr.Markdown("Upload a reference audio and input your text to synthesize speech with style control.")

	with gr.Row():
	text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", elem_id="custom-box")
	reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
	# Parameters
	with gr.Accordion("Advanced Settings", open=False):
	denoise = gr.Checkbox(label="Apply Denoising", value=True)
	t_denoise = gr.Slider(0.0, 1.0, value=0.3, label="Denoise Strength")
	n_merge = gr.Slider(1, 30, value=16, label="Min Words to Merge")
	randomness = gr.Slider(0.0, 1.0, value=0.2, label="Randomness")
	smooth_dur = gr.Slider(0.0, 1.0, value=0.15, label="Smooth Duration")
	split_dur = gr.Slider(0, 10, step=1, value=3, label="Split Ref Audio Duration")

	submit_button = gr.Button("Synthesize")
	synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")

	submit_button.click(
	fn=process_inputs,
	inputs=[
	text_prompt,
	reference_audios,
	n_merge,
	randomness,
	smooth_dur,
	denoise,
	t_denoise,
	split_dur
	],
	outputs=synthesized_audio
	)

	demo.launch()