Spaces:
Sleeping
Sleeping
import torch, gradio as gr, soundfile as sf, tempfile | |
from transformers import VitsModel, AutoProcessor | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
MODELS = { | |
"Male": "vlntlbr/vits-greek-male", | |
"Female": "vlntlbr/vits-greek-female", | |
} | |
tts = {k: VitsModel.from_pretrained(r).to(DEVICE) for k, r in MODELS.items()} | |
proc = {k: AutoProcessor.from_pretrained(r) for k, r in MODELS.items()} | |
SNIPPETS = { | |
"Male": "audio/male_ref.mp3", | |
"Female": "audio/female_ref.mp3", | |
} | |
def synth(text, speaker): | |
inputs = proc[speaker](text, return_tensors="pt").to(DEVICE) | |
with torch.no_grad(): | |
wav = tts[speaker](**inputs).waveform.squeeze().cpu() | |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
sf.write(tmp.name, wav.numpy(), 16000) | |
return tmp.name | |
# Add examples (Greek phrases with their respective speakers) | |
examples = [ | |
["Πες μου πώς σε λένε.", "Male"], | |
["Τι μας προτείνεις;", "Female"], | |
["Είναι τόσο όμορφα έξω!", "Male"], | |
["Η οικογένεια είναι μαζεμένη στην τραπεζαρία", "Female"] | |
] | |
custom_css = """ | |
h1, .gr-markdown h1 { | |
text-align: center; | |
} | |
.gr-markdown p { | |
text-align: center; | |
} | |
""" | |
with gr.Blocks(title="Greek TTS (male / female)", css=custom_css) as demo: | |
gr.Markdown( | |
"# Greek TTS Demo\n" | |
"Choose a speaker, listen to a reference clip, then enter Greek text." | |
) | |
with gr.Row(): | |
speaker = gr.Radio(["Male", "Female"], value="Male", label="Speaker") | |
ref_aud = gr.Audio(SNIPPETS["Male"], interactive=False, label="Reference") | |
speaker.change(lambda s: gr.update(value=SNIPPETS[s]), speaker, ref_aud) | |
text_in = gr.Textbox(label="Greek text", placeholder="Γράψε κάτι…") | |
gr.Examples(examples=examples, inputs=[text_in, speaker]) | |
out_aud = gr.Audio(label="Synthesised speech") | |
gr.Button("Generate!").click(synth, [text_in, speaker], out_aud) | |
demo.launch() |