vlntlbr's picture
Upload app.py with huggingface_hub
caf892f verified
import torch, gradio as gr, soundfile as sf, tempfile
from transformers import VitsModel, AutoProcessor
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODELS = {
"Male": "vlntlbr/vits-greek-male",
"Female": "vlntlbr/vits-greek-female",
}
tts = {k: VitsModel.from_pretrained(r).to(DEVICE) for k, r in MODELS.items()}
proc = {k: AutoProcessor.from_pretrained(r) for k, r in MODELS.items()}
SNIPPETS = {
"Male": "audio/male_ref.mp3",
"Female": "audio/female_ref.mp3",
}
def synth(text, speaker):
inputs = proc[speaker](text, return_tensors="pt").to(DEVICE)
with torch.no_grad():
wav = tts[speaker](**inputs).waveform.squeeze().cpu()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
sf.write(tmp.name, wav.numpy(), 16000)
return tmp.name
# Add examples (Greek phrases with their respective speakers)
examples = [
["Πες μου πώς σε λένε.", "Male"],
["Τι μας προτείνεις;", "Female"],
["Είναι τόσο όμορφα έξω!", "Male"],
["Η οικογένεια είναι μαζεμένη στην τραπεζαρία", "Female"]
]
custom_css = """
h1, .gr-markdown h1 {
text-align: center;
}
.gr-markdown p {
text-align: center;
}
"""
with gr.Blocks(title="Greek TTS (male / female)", css=custom_css) as demo:
gr.Markdown(
"# Greek TTS Demo\n"
"Choose a speaker, listen to a reference clip, then enter Greek text."
)
with gr.Row():
speaker = gr.Radio(["Male", "Female"], value="Male", label="Speaker")
ref_aud = gr.Audio(SNIPPETS["Male"], interactive=False, label="Reference")
speaker.change(lambda s: gr.update(value=SNIPPETS[s]), speaker, ref_aud)
text_in = gr.Textbox(label="Greek text", placeholder="Γράψε κάτι…")
gr.Examples(examples=examples, inputs=[text_in, speaker])
out_aud = gr.Audio(label="Synthesised speech")
gr.Button("Generate!").click(synth, [text_in, speaker], out_aud)
demo.launch()