Spaces:
Sleeping
Sleeping
File size: 2,034 Bytes
3bce9df 9d55f3c d10ee69 9d55f3c 44a44c5 9d55f3c 3bce9df 9d55f3c d10ee69 9d55f3c 5dce60b 44a44c5 9d55f3c 82446c0 92b16ae caf892f 92b16ae 82446c0 f172d10 120db4e f172d10 44a44c5 f172d10 44a44c5 f172d10 44a44c5 9628add 44a44c5 82446c0 f172d10 82446c0 44a44c5 d10ee69 f172d10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch, gradio as gr, soundfile as sf, tempfile
from transformers import VitsModel, AutoProcessor
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODELS = {
"Male": "vlntlbr/vits-greek-male",
"Female": "vlntlbr/vits-greek-female",
}
tts = {k: VitsModel.from_pretrained(r).to(DEVICE) for k, r in MODELS.items()}
proc = {k: AutoProcessor.from_pretrained(r) for k, r in MODELS.items()}
SNIPPETS = {
"Male": "audio/male_ref.mp3",
"Female": "audio/female_ref.mp3",
}
def synth(text, speaker):
inputs = proc[speaker](text, return_tensors="pt").to(DEVICE)
with torch.no_grad():
wav = tts[speaker](**inputs).waveform.squeeze().cpu()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
sf.write(tmp.name, wav.numpy(), 16000)
return tmp.name
# Add examples (Greek phrases with their respective speakers)
examples = [
["Πες μου πώς σε λένε.", "Male"],
["Τι μας προτείνεις;", "Female"],
["Είναι τόσο όμορφα έξω!", "Male"],
["Η οικογένεια είναι μαζεμένη στην τραπεζαρία", "Female"]
]
custom_css = """
h1, .gr-markdown h1 {
text-align: center;
}
.gr-markdown p {
text-align: center;
}
"""
with gr.Blocks(title="Greek TTS (male / female)", css=custom_css) as demo:
gr.Markdown(
"# Greek TTS Demo\n"
"Choose a speaker, listen to a reference clip, then enter Greek text."
)
with gr.Row():
speaker = gr.Radio(["Male", "Female"], value="Male", label="Speaker")
ref_aud = gr.Audio(SNIPPETS["Male"], interactive=False, label="Reference")
speaker.change(lambda s: gr.update(value=SNIPPETS[s]), speaker, ref_aud)
text_in = gr.Textbox(label="Greek text", placeholder="Γράψε κάτι…")
gr.Examples(examples=examples, inputs=[text_in, speaker])
out_aud = gr.Audio(label="Synthesised speech")
gr.Button("Generate!").click(synth, [text_in, speaker], out_aud)
demo.launch() |