Rasmus Lellep commited on
Commit
b137cc2
·
1 Parent(s): 1e4fe3c

working new gradio version, added more example clips

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +25 -29
  3. examples/female.wav +3 -0
  4. examples/male.wav +3 -0
  5. requirements.txt +3 -3
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🦀
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.50.2
8
  python_version: 3.11
9
  app_file: app.py
10
  pinned: false
 
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.41.0
8
  python_version: 3.11
9
  app_file: app.py
10
  pinned: false
app.py CHANGED
@@ -416,9 +416,6 @@ def predict(
416
  None,
417
  )
418
  return (
419
- gr.make_waveform(
420
- audio="output.wav",
421
- ),
422
  "output.wav",
423
  metrics_text,
424
  speaker_wav,
@@ -471,7 +468,7 @@ examples = [
471
  [
472
  "Once when I was six years old I saw a magnificent picture",
473
  "en",
474
- "examples/LJ001-0030.wav",
475
  None,
476
  False,
477
  False,
@@ -481,7 +478,7 @@ examples = [
481
  [
482
  "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
483
  "fr",
484
- "examples/LJ001-0030.wav",
485
  None,
486
  False,
487
  False,
@@ -491,7 +488,7 @@ examples = [
491
  [
492
  "Als ich sechs war, sah ich einmal ein wunderbares Bild",
493
  "de",
494
- "examples/LJ001-0030.wav",
495
  None,
496
  False,
497
  False,
@@ -501,7 +498,7 @@ examples = [
501
  [
502
  "Cuando tenía seis años, vi una vez una imagen magnífica",
503
  "es",
504
- "examples/LJ001-0030.wav",
505
  None,
506
  False,
507
  False,
@@ -511,7 +508,7 @@ examples = [
511
  [
512
  "Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
513
  "et",
514
- "examples/LJ001-0030.wav",
515
  None,
516
  False,
517
  False,
@@ -521,7 +518,7 @@ examples = [
521
  [
522
  "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
523
  "pt",
524
- "examples/LJ001-0030.wav",
525
  None,
526
  False,
527
  False,
@@ -531,7 +528,7 @@ examples = [
531
  [
532
  "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
533
  "pl",
534
- "examples/LJ001-0030.wav",
535
  None,
536
  False,
537
  False,
@@ -541,7 +538,7 @@ examples = [
541
  [
542
  "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
543
  "it",
544
- "examples/LJ001-0030.wav",
545
  None,
546
  False,
547
  False,
@@ -551,7 +548,7 @@ examples = [
551
  [
552
  "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
553
  "tr",
554
- "examples/LJ001-0030.wav",
555
  None,
556
  False,
557
  False,
@@ -561,7 +558,7 @@ examples = [
561
  [
562
  "Когда мне было шесть лет, я увидел однажды удивительную картинку",
563
  "ru",
564
- "examples/LJ001-0030.wav",
565
  None,
566
  False,
567
  False,
@@ -571,7 +568,7 @@ examples = [
571
  [
572
  "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
573
  "nl",
574
- "examples/LJ001-0030.wav",
575
  None,
576
  False,
577
  False,
@@ -581,7 +578,7 @@ examples = [
581
  [
582
  "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
583
  "cs",
584
- "examples/LJ001-0030.wav",
585
  None,
586
  False,
587
  False,
@@ -591,7 +588,7 @@ examples = [
591
  [
592
  "当我还只有六岁的时候, 看到了一副精彩的插画",
593
  "zh-cn",
594
- "examples/LJ001-0030.wav",
595
  None,
596
  False,
597
  False,
@@ -601,7 +598,7 @@ examples = [
601
  [
602
  "かつて 六歳のとき、素晴らしい絵を見ました",
603
  "ja",
604
- "examples/LJ001-0030.wav",
605
  None,
606
  False,
607
  True,
@@ -611,17 +608,17 @@ examples = [
611
  [
612
  "한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
613
  "ko",
614
- "examples/LJ001-0030.wav",
615
  None,
616
  False,
617
  True,
618
  False,
619
  True,
620
  ],
621
- [
622
  "Egyszer hat éves koromban láttam egy csodálatos képet",
623
  "hu",
624
- "examples/LJ001-0030.wav",
625
  None,
626
  False,
627
  True,
@@ -655,7 +652,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
655
  input_text_gr = gr.Textbox(
656
  label="Text Prompt",
657
  info="One or two sentences at a time is better. Up to 200 text characters.",
658
- value="Tere, olen sinu uus häälekloon. Ürita mulle lindistada võimalikult hea kvaliteediga klipp, et oskaksin su häält paremini jäljendada.",
659
  )
660
  language_gr = gr.Dropdown(
661
  label="Language",
@@ -680,18 +677,18 @@ with gr.Blocks(analytics_enabled=False) as demo:
680
  "hu",
681
  "hi"
682
  ],
683
- max_choices=1,
684
  value="et",
685
  )
686
  ref_gr = gr.Audio(
687
  label="Reference Audio",
688
- info="Click on the ✎ button to upload your own target speaker audio",
689
  type="filepath",
690
- value="examples/LJ001-0030.wav",
691
  )
692
  mic_gr = gr.Audio(
693
- source="microphone",
694
- info="Use your microphone to record audio",
695
  type="filepath",
696
  label="Use Microphone for Reference",
697
  )
@@ -720,7 +717,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
720
 
721
 
722
  with gr.Column():
723
- video_gr = gr.Video(label="Waveform Visual")
724
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
725
  out_text_gr = gr.Text(label="Metrics")
726
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
@@ -729,11 +725,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
729
  gr.Examples(examples,
730
  label="Examples",
731
  inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
732
- outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
733
  fn=predict,
734
  cache_examples=False,)
735
 
736
- tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
737
 
738
  if __name__ == "__main__":
739
  demo.queue()
 
416
  None,
417
  )
418
  return (
 
 
 
419
  "output.wav",
420
  metrics_text,
421
  speaker_wav,
 
468
  [
469
  "Once when I was six years old I saw a magnificent picture",
470
  "en",
471
+ "examples/female.wav",
472
  None,
473
  False,
474
  False,
 
478
  [
479
  "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
480
  "fr",
481
+ "examples/female.wav",
482
  None,
483
  False,
484
  False,
 
488
  [
489
  "Als ich sechs war, sah ich einmal ein wunderbares Bild",
490
  "de",
491
+ "examples/female.wav",
492
  None,
493
  False,
494
  False,
 
498
  [
499
  "Cuando tenía seis años, vi una vez una imagen magnífica",
500
  "es",
501
+ "examples/female.wav",
502
  None,
503
  False,
504
  False,
 
508
  [
509
  "Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
510
  "et",
511
+ "examples/female.wav",
512
  None,
513
  False,
514
  False,
 
518
  [
519
  "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
520
  "pt",
521
+ "examples/female.wav",
522
  None,
523
  False,
524
  False,
 
528
  [
529
  "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
530
  "pl",
531
+ "examples/female.wav",
532
  None,
533
  False,
534
  False,
 
538
  [
539
  "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
540
  "it",
541
+ "examples/female.wav",
542
  None,
543
  False,
544
  False,
 
548
  [
549
  "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
550
  "tr",
551
+ "examples/male.wav",
552
  None,
553
  False,
554
  False,
 
558
  [
559
  "Когда мне было шесть лет, я увидел однажды удивительную картинку",
560
  "ru",
561
+ "examples/female.wav",
562
  None,
563
  False,
564
  False,
 
568
  [
569
  "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
570
  "nl",
571
+ "examples/male.wav",
572
  None,
573
  False,
574
  False,
 
578
  [
579
  "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
580
  "cs",
581
+ "examples/female.wav",
582
  None,
583
  False,
584
  False,
 
588
  [
589
  "当我还只有六岁的时候, 看到了一副精彩的插画",
590
  "zh-cn",
591
+ "examples/male.wav",
592
  None,
593
  False,
594
  False,
 
598
  [
599
  "かつて 六歳のとき、素晴らしい絵を見ました",
600
  "ja",
601
+ "examples/female.wav",
602
  None,
603
  False,
604
  True,
 
608
  [
609
  "한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
610
  "ko",
611
+ "examples/male.wav",
612
  None,
613
  False,
614
  True,
615
  False,
616
  True,
617
  ],
618
+ [
619
  "Egyszer hat éves koromban láttam egy csodálatos képet",
620
  "hu",
621
+ "examples/male.wav",
622
  None,
623
  False,
624
  True,
 
652
  input_text_gr = gr.Textbox(
653
  label="Text Prompt",
654
  info="One or two sentences at a time is better. Up to 200 text characters.",
655
+ value="Tere, olen sinu hääle kloon. Ürita mulle lindistada võimalikult hea kvaliteediga klipp, et oskaksin su kõnet paremini jäljendada.",
656
  )
657
  language_gr = gr.Dropdown(
658
  label="Language",
 
677
  "hu",
678
  "hi"
679
  ],
680
+ multiselect=False,
681
  value="et",
682
  )
683
  ref_gr = gr.Audio(
684
  label="Reference Audio",
685
+ #info="Click on the ✎ button to upload your own target speaker audio",
686
  type="filepath",
687
+ value="examples/female.wav",
688
  )
689
  mic_gr = gr.Audio(
690
+ sources="microphone",
691
+ #info="Use your microphone to record audio",
692
  type="filepath",
693
  label="Use Microphone for Reference",
694
  )
 
717
 
718
 
719
  with gr.Column():
 
720
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
721
  out_text_gr = gr.Text(label="Metrics")
722
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
 
725
  gr.Examples(examples,
726
  label="Examples",
727
  inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
728
+ outputs=[audio_gr, out_text_gr, ref_audio_gr],
729
  fn=predict,
730
  cache_examples=False,)
731
 
732
+ tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[audio_gr, out_text_gr, ref_audio_gr])
733
 
734
  if __name__ == "__main__":
735
  demo.queue()
examples/female.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
3
+ size 1002030
examples/male.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
3
+ size 762126
requirements.txt CHANGED
@@ -65,9 +65,9 @@ spacy[ja]>=3,<3.8
65
  tokenizers==0.20.1
66
  #deps for gradio
67
  huggingface_hub
68
- gradio==3.50.2
69
- pydantic==1.10.13
70
- python-multipart==0.0.6
71
  typing-extensions>=4.8.0
72
  langid
73
  deepspeed==0.14.5
 
65
  tokenizers==0.20.1
66
  #deps for gradio
67
  huggingface_hub
68
+ gradio==5.41.0
69
+ pydantic==2.11.7
70
+ python-multipart==0.0.20
71
  typing-extensions>=4.8.0
72
  langid
73
  deepspeed==0.14.5