Spaces:
Sleeping
Sleeping
Rasmus Lellep
commited on
Commit
·
b137cc2
1
Parent(s):
1e4fe3c
working new gradio version, added more example clips
Browse files- README.md +1 -1
- app.py +25 -29
- examples/female.wav +3 -0
- examples/male.wav +3 -0
- requirements.txt +3 -3
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🦀
|
|
4 |
colorFrom: blue
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
python_version: 3.11
|
9 |
app_file: app.py
|
10 |
pinned: false
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.41.0
|
8 |
python_version: 3.11
|
9 |
app_file: app.py
|
10 |
pinned: false
|
app.py
CHANGED
@@ -416,9 +416,6 @@ def predict(
|
|
416 |
None,
|
417 |
)
|
418 |
return (
|
419 |
-
gr.make_waveform(
|
420 |
-
audio="output.wav",
|
421 |
-
),
|
422 |
"output.wav",
|
423 |
metrics_text,
|
424 |
speaker_wav,
|
@@ -471,7 +468,7 @@ examples = [
|
|
471 |
[
|
472 |
"Once when I was six years old I saw a magnificent picture",
|
473 |
"en",
|
474 |
-
"examples/
|
475 |
None,
|
476 |
False,
|
477 |
False,
|
@@ -481,7 +478,7 @@ examples = [
|
|
481 |
[
|
482 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
483 |
"fr",
|
484 |
-
"examples/
|
485 |
None,
|
486 |
False,
|
487 |
False,
|
@@ -491,7 +488,7 @@ examples = [
|
|
491 |
[
|
492 |
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
493 |
"de",
|
494 |
-
"examples/
|
495 |
None,
|
496 |
False,
|
497 |
False,
|
@@ -501,7 +498,7 @@ examples = [
|
|
501 |
[
|
502 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
503 |
"es",
|
504 |
-
"examples/
|
505 |
None,
|
506 |
False,
|
507 |
False,
|
@@ -511,7 +508,7 @@ examples = [
|
|
511 |
[
|
512 |
"Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
|
513 |
"et",
|
514 |
-
"examples/
|
515 |
None,
|
516 |
False,
|
517 |
False,
|
@@ -521,7 +518,7 @@ examples = [
|
|
521 |
[
|
522 |
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
523 |
"pt",
|
524 |
-
"examples/
|
525 |
None,
|
526 |
False,
|
527 |
False,
|
@@ -531,7 +528,7 @@ examples = [
|
|
531 |
[
|
532 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
533 |
"pl",
|
534 |
-
"examples/
|
535 |
None,
|
536 |
False,
|
537 |
False,
|
@@ -541,7 +538,7 @@ examples = [
|
|
541 |
[
|
542 |
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
543 |
"it",
|
544 |
-
"examples/
|
545 |
None,
|
546 |
False,
|
547 |
False,
|
@@ -551,7 +548,7 @@ examples = [
|
|
551 |
[
|
552 |
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
553 |
"tr",
|
554 |
-
"examples/
|
555 |
None,
|
556 |
False,
|
557 |
False,
|
@@ -561,7 +558,7 @@ examples = [
|
|
561 |
[
|
562 |
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
563 |
"ru",
|
564 |
-
"examples/
|
565 |
None,
|
566 |
False,
|
567 |
False,
|
@@ -571,7 +568,7 @@ examples = [
|
|
571 |
[
|
572 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
573 |
"nl",
|
574 |
-
"examples/
|
575 |
None,
|
576 |
False,
|
577 |
False,
|
@@ -581,7 +578,7 @@ examples = [
|
|
581 |
[
|
582 |
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
583 |
"cs",
|
584 |
-
"examples/
|
585 |
None,
|
586 |
False,
|
587 |
False,
|
@@ -591,7 +588,7 @@ examples = [
|
|
591 |
[
|
592 |
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
593 |
"zh-cn",
|
594 |
-
"examples/
|
595 |
None,
|
596 |
False,
|
597 |
False,
|
@@ -601,7 +598,7 @@ examples = [
|
|
601 |
[
|
602 |
"かつて 六歳のとき、素晴らしい絵を見ました",
|
603 |
"ja",
|
604 |
-
"examples/
|
605 |
None,
|
606 |
False,
|
607 |
True,
|
@@ -611,17 +608,17 @@ examples = [
|
|
611 |
[
|
612 |
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
613 |
"ko",
|
614 |
-
"examples/
|
615 |
None,
|
616 |
False,
|
617 |
True,
|
618 |
False,
|
619 |
True,
|
620 |
],
|
621 |
-
|
622 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
623 |
"hu",
|
624 |
-
"examples/
|
625 |
None,
|
626 |
False,
|
627 |
True,
|
@@ -655,7 +652,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
655 |
input_text_gr = gr.Textbox(
|
656 |
label="Text Prompt",
|
657 |
info="One or two sentences at a time is better. Up to 200 text characters.",
|
658 |
-
value="Tere, olen sinu
|
659 |
)
|
660 |
language_gr = gr.Dropdown(
|
661 |
label="Language",
|
@@ -680,18 +677,18 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
680 |
"hu",
|
681 |
"hi"
|
682 |
],
|
683 |
-
|
684 |
value="et",
|
685 |
)
|
686 |
ref_gr = gr.Audio(
|
687 |
label="Reference Audio",
|
688 |
-
info="Click on the ✎ button to upload your own target speaker audio",
|
689 |
type="filepath",
|
690 |
-
value="examples/
|
691 |
)
|
692 |
mic_gr = gr.Audio(
|
693 |
-
|
694 |
-
info="Use your microphone to record audio",
|
695 |
type="filepath",
|
696 |
label="Use Microphone for Reference",
|
697 |
)
|
@@ -720,7 +717,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
720 |
|
721 |
|
722 |
with gr.Column():
|
723 |
-
video_gr = gr.Video(label="Waveform Visual")
|
724 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
725 |
out_text_gr = gr.Text(label="Metrics")
|
726 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
@@ -729,11 +725,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
729 |
gr.Examples(examples,
|
730 |
label="Examples",
|
731 |
inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
|
732 |
-
outputs=[
|
733 |
fn=predict,
|
734 |
cache_examples=False,)
|
735 |
|
736 |
-
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[
|
737 |
|
738 |
if __name__ == "__main__":
|
739 |
demo.queue()
|
|
|
416 |
None,
|
417 |
)
|
418 |
return (
|
|
|
|
|
|
|
419 |
"output.wav",
|
420 |
metrics_text,
|
421 |
speaker_wav,
|
|
|
468 |
[
|
469 |
"Once when I was six years old I saw a magnificent picture",
|
470 |
"en",
|
471 |
+
"examples/female.wav",
|
472 |
None,
|
473 |
False,
|
474 |
False,
|
|
|
478 |
[
|
479 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
480 |
"fr",
|
481 |
+
"examples/female.wav",
|
482 |
None,
|
483 |
False,
|
484 |
False,
|
|
|
488 |
[
|
489 |
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
490 |
"de",
|
491 |
+
"examples/female.wav",
|
492 |
None,
|
493 |
False,
|
494 |
False,
|
|
|
498 |
[
|
499 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
500 |
"es",
|
501 |
+
"examples/female.wav",
|
502 |
None,
|
503 |
False,
|
504 |
False,
|
|
|
508 |
[
|
509 |
"Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
|
510 |
"et",
|
511 |
+
"examples/female.wav",
|
512 |
None,
|
513 |
False,
|
514 |
False,
|
|
|
518 |
[
|
519 |
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
520 |
"pt",
|
521 |
+
"examples/female.wav",
|
522 |
None,
|
523 |
False,
|
524 |
False,
|
|
|
528 |
[
|
529 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
530 |
"pl",
|
531 |
+
"examples/female.wav",
|
532 |
None,
|
533 |
False,
|
534 |
False,
|
|
|
538 |
[
|
539 |
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
540 |
"it",
|
541 |
+
"examples/female.wav",
|
542 |
None,
|
543 |
False,
|
544 |
False,
|
|
|
548 |
[
|
549 |
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
550 |
"tr",
|
551 |
+
"examples/male.wav",
|
552 |
None,
|
553 |
False,
|
554 |
False,
|
|
|
558 |
[
|
559 |
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
560 |
"ru",
|
561 |
+
"examples/female.wav",
|
562 |
None,
|
563 |
False,
|
564 |
False,
|
|
|
568 |
[
|
569 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
570 |
"nl",
|
571 |
+
"examples/male.wav",
|
572 |
None,
|
573 |
False,
|
574 |
False,
|
|
|
578 |
[
|
579 |
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
580 |
"cs",
|
581 |
+
"examples/female.wav",
|
582 |
None,
|
583 |
False,
|
584 |
False,
|
|
|
588 |
[
|
589 |
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
590 |
"zh-cn",
|
591 |
+
"examples/male.wav",
|
592 |
None,
|
593 |
False,
|
594 |
False,
|
|
|
598 |
[
|
599 |
"かつて 六歳のとき、素晴らしい絵を見ました",
|
600 |
"ja",
|
601 |
+
"examples/female.wav",
|
602 |
None,
|
603 |
False,
|
604 |
True,
|
|
|
608 |
[
|
609 |
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
610 |
"ko",
|
611 |
+
"examples/male.wav",
|
612 |
None,
|
613 |
False,
|
614 |
True,
|
615 |
False,
|
616 |
True,
|
617 |
],
|
618 |
+
[
|
619 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
620 |
"hu",
|
621 |
+
"examples/male.wav",
|
622 |
None,
|
623 |
False,
|
624 |
True,
|
|
|
652 |
input_text_gr = gr.Textbox(
|
653 |
label="Text Prompt",
|
654 |
info="One or two sentences at a time is better. Up to 200 text characters.",
|
655 |
+
value="Tere, olen sinu hääle kloon. Ürita mulle lindistada võimalikult hea kvaliteediga klipp, et oskaksin su kõnet paremini jäljendada.",
|
656 |
)
|
657 |
language_gr = gr.Dropdown(
|
658 |
label="Language",
|
|
|
677 |
"hu",
|
678 |
"hi"
|
679 |
],
|
680 |
+
multiselect=False,
|
681 |
value="et",
|
682 |
)
|
683 |
ref_gr = gr.Audio(
|
684 |
label="Reference Audio",
|
685 |
+
#info="Click on the ✎ button to upload your own target speaker audio",
|
686 |
type="filepath",
|
687 |
+
value="examples/female.wav",
|
688 |
)
|
689 |
mic_gr = gr.Audio(
|
690 |
+
sources="microphone",
|
691 |
+
#info="Use your microphone to record audio",
|
692 |
type="filepath",
|
693 |
label="Use Microphone for Reference",
|
694 |
)
|
|
|
717 |
|
718 |
|
719 |
with gr.Column():
|
|
|
720 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
721 |
out_text_gr = gr.Text(label="Metrics")
|
722 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
|
|
725 |
gr.Examples(examples,
|
726 |
label="Examples",
|
727 |
inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
|
728 |
+
outputs=[audio_gr, out_text_gr, ref_audio_gr],
|
729 |
fn=predict,
|
730 |
cache_examples=False,)
|
731 |
|
732 |
+
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[audio_gr, out_text_gr, ref_audio_gr])
|
733 |
|
734 |
if __name__ == "__main__":
|
735 |
demo.queue()
|
examples/female.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
|
3 |
+
size 1002030
|
examples/male.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
|
3 |
+
size 762126
|
requirements.txt
CHANGED
@@ -65,9 +65,9 @@ spacy[ja]>=3,<3.8
|
|
65 |
tokenizers==0.20.1
|
66 |
#deps for gradio
|
67 |
huggingface_hub
|
68 |
-
gradio==
|
69 |
-
pydantic==
|
70 |
-
python-multipart==0.0.
|
71 |
typing-extensions>=4.8.0
|
72 |
langid
|
73 |
deepspeed==0.14.5
|
|
|
65 |
tokenizers==0.20.1
|
66 |
#deps for gradio
|
67 |
huggingface_hub
|
68 |
+
gradio==5.41.0
|
69 |
+
pydantic==2.11.7
|
70 |
+
python-multipart==0.0.20
|
71 |
typing-extensions>=4.8.0
|
72 |
langid
|
73 |
deepspeed==0.14.5
|