File size: 2,034 Bytes
3bce9df
 
 
 
 
9d55f3c
 
 
 
 
 
d10ee69
9d55f3c
44a44c5
 
9d55f3c
 
3bce9df
9d55f3c
 
d10ee69
9d55f3c
5dce60b
44a44c5
9d55f3c
 
82446c0
 
92b16ae
caf892f
92b16ae
 
82446c0
 
f172d10
120db4e
f172d10
 
 
 
 
 
 
 
 
44a44c5
 
f172d10
44a44c5
f172d10
44a44c5
 
 
 
9628add
44a44c5
82446c0
f172d10
82446c0
 
44a44c5
d10ee69
f172d10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch, gradio as gr, soundfile as sf, tempfile
from transformers import VitsModel, AutoProcessor

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODELS = {
    "Male":   "vlntlbr/vits-greek-male",
    "Female": "vlntlbr/vits-greek-female",
}
tts  = {k: VitsModel.from_pretrained(r).to(DEVICE) for k, r in MODELS.items()}
proc = {k: AutoProcessor.from_pretrained(r)        for k, r in MODELS.items()}

SNIPPETS = {
    "Male":   "audio/male_ref.mp3",
    "Female": "audio/female_ref.mp3",
}


def synth(text, speaker):
    inputs = proc[speaker](text, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        wav = tts[speaker](**inputs).waveform.squeeze().cpu()
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    sf.write(tmp.name, wav.numpy(), 16000)
    return tmp.name

# Add examples (Greek phrases with their respective speakers)
examples = [
    ["Πες μου πώς σε λένε.", "Male"],  
    ["Τι μας προτείνεις;", "Female"],
    ["Είναι τόσο όμορφα έξω!", "Male"],  
    ["Η οικογένεια είναι μαζεμένη στην τραπεζαρία", "Female"] 
]

custom_css = """

h1, .gr-markdown h1 {
    text-align: center;
}
.gr-markdown p {
    text-align: center;
}
"""

with gr.Blocks(title="Greek TTS (male / female)", css=custom_css) as demo:
    gr.Markdown(
        "# Greek TTS Demo\n"
        "Choose a speaker, listen to a reference clip, then enter Greek text."
    )

    with gr.Row():
        speaker = gr.Radio(["Male", "Female"], value="Male", label="Speaker")
        ref_aud = gr.Audio(SNIPPETS["Male"], interactive=False, label="Reference")
        speaker.change(lambda s: gr.update(value=SNIPPETS[s]), speaker, ref_aud)

    text_in = gr.Textbox(label="Greek text", placeholder="Γράψε κάτι…")
    
    gr.Examples(examples=examples, inputs=[text_in, speaker])

    out_aud = gr.Audio(label="Synthesised speech")
    gr.Button("Generate!").click(synth, [text_in, speaker], out_aud)

demo.launch()