Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

App Files Files Community

fffiloni commited on Aug 22, 2023

Commit

cc07fe9

1 Parent(s): ce514b9

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -10

app.py CHANGED Viewed

@@ -13,15 +13,15 @@ model_ids = [
 for model_id in model_ids:
     model_name = model_id.split('/')[-1]
     snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
-"""
-#from TTS.tts.configs.bark_config import BarkConfig
-#from TTS.tts.models.bark import Bark
-#os.environ['CUDA_VISIBLE_DEVICES'] = '1'
-#config = BarkConfig()
-#model = Bark.init_from_config(config)
-#model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
 from TTS.api import TTS
 tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
@@ -87,25 +87,34 @@ def infer(prompt, input_wav_file):
     # Print the contents
     for item in contents:
-        print(item)
-    return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
 css = """
 #col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML("""
         <h1 style="text-align: center;">Instant Voice Cloning</h1>
         <p style="text-align: center;">
         Clone any voice in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TSS + Bark</a> demo ! <br />
         Upload a clean 20 seconds WAV file of the voice you want to clone, <br />
         type your text-to-speech prompt and hit submit ! <br />
         </p>
         """)
         prompt = gr.Textbox(
@@ -124,6 +133,10 @@ with gr.Blocks(css=css) as demo:
         cloned_out = gr.Audio(
             label="Text to speech output"
         )
         npz_file = gr.File(
             label=".npz file"
@@ -137,6 +150,7 @@ with gr.Blocks(css=css) as demo:
         ],
         outputs = [
             cloned_out,
             npz_file
         ]
     )

 for model_id in model_ids:
     model_name = model_id.split('/')[-1]
     snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
+from TTS.tts.configs.bark_config import BarkConfig
+from TTS.tts.models.bark import Bark
+#os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+config = BarkConfig()
+model = Bark.init_from_config(config)
+model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
+"""
 from TTS.api import TTS
 tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
     # Print the contents
     for item in contents:
+        print(item)
+    tts_video = gr.make_waveform(audio="output.wav")
+    return "output.wav", tts_video, f"bark_voices/{file_name}/{contents[1]}"
 css = """
 #col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
+img[src*='#center'] {
+    display: block;
+    margin: auto;
+}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("""
         <h1 style="text-align: center;">Instant Voice Cloning</h1>
         <p style="text-align: center;">
         Clone any voice in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TSS + Bark</a> demo ! <br />
         Upload a clean 20 seconds WAV file of the voice you want to clone, <br />
         type your text-to-speech prompt and hit submit ! <br />
         </p>
+        [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/instant-TTS-Bark-cloning?duplicate=true)
         """)
         prompt = gr.Textbox(
         cloned_out = gr.Audio(
             label="Text to speech output"
         )
+        video_out = gr.Video(
+            label = "Waveform video"
+        )
         npz_file = gr.File(
             label=".npz file"
         ],
         outputs = [
             cloned_out,
+            video_out,
             npz_file
         ]
     )