bark-with-custom-voice

Running

App Files Files Community

Clint Adams commited on Apr 21

Commit

b618cc5

•

1 Parent(s): 6c33c6b

change voice input

Browse files

Files changed (5) hide show

README.md +2 -3
app.py +34 -33
examples/1.npz +3 -0
examples/2.npz +3 -0
requirements.txt +4 -5

README.md CHANGED Viewed

@@ -1,11 +1,10 @@
 ---
-title: Bark
 emoji: 🐶
 colorFrom: gray
 colorTo: blue
 sdk: gradio
-sdk_version: 3.27.0
-python_version: 3.8.15
 app_file: app.py
 pinned: false
 license: cc-by-nc-4.0

 ---
+title: Bark (with user-supplied voices)
 emoji: 🐶
 colorFrom: gray
 colorTo: blue
 sdk: gradio
+sdk_version: 4.26.0
 app_file: app.py
 pinned: false
 license: cc-by-nc-4.0

app.py CHANGED Viewed

@@ -3,39 +3,46 @@ import gradio as gr
 from bark import SAMPLE_RATE, generate_audio, preload_models
 from bark.generation import SUPPORTED_LANGS
 from share_btn import community_icon_html, loading_icon_html, share_js
 DEBUG_MODE = False
 if not DEBUG_MODE:
     _ = preload_models()
-AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
-PROMPT_LOOKUP = {}
-for _, lang in SUPPORTED_LANGS:
-    for n in range(10):
-        label = f"Speaker {n} ({lang})"
-        AVAILABLE_PROMPTS.append(label)
-        PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
-PROMPT_LOOKUP["Unconditional"] = None
-PROMPT_LOOKUP["Announcer"] = "announcer"
-default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
-title = "# 🐶 Bark</div>"
 description = """
-<div>
-<a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
-<a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
-<a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
-</div>
-Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
-Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
-This demo should be used for research purposes only. Commercial use is strictly prohibited. \
-The model output is not censored and the authors do not endorse the opinions in the generated content. \
 Use at your own risk.
 """
 article = """
 ## 🌎 Foreign Language
@@ -108,17 +115,12 @@ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commerc
 """
 examples = [
-    ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!",
-        "Unconditional"],  # , 0.7, 0.7],
-    ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
-        "Speaker 1 (en)"],  # , 0.7, 0.7],
-    ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
-        "Speaker 0 (es)"],  # , 0.7, 0.7],
 ]
 def gen_tts(text, history_prompt):  # , temp_semantic, temp_waveform):
-    history_prompt = PROMPT_LOOKUP[history_prompt]
     if DEBUG_MODE:
         audio_arr = np.zeros(SAMPLE_RATE)
     else:
@@ -171,9 +173,8 @@ with gr.Blocks(css=css) as block:
         with gr.Column():
             input_text = gr.Textbox(
                 label="Input Text", lines=2, value=default_text, elem_id="input_text")
-            options = gr.Dropdown(
-                AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
-            run_button = gr.Button(text="Generate Audio", type="button")
         with gr.Column():
             audio_out = gr.Audio(label="Generated Audio",
                                  type="numpy", elem_id="audio_out")
@@ -183,7 +184,7 @@ with gr.Blocks(css=css) as block:
                     loading_icon = gr.HTML(loading_icon_html)
                     share_button = gr.Button(
                         "Share to community", elem_id="share-btn")
-                    share_button.click(None, [], [], _js=share_js)
     inputs = [input_text, options]
     outputs = [audio_out]
     gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,

 from bark import SAMPLE_RATE, generate_audio, preload_models
 from bark.generation import SUPPORTED_LANGS
 from share_btn import community_icon_html, loading_icon_html, share_js
+import spaces
+import torch
 DEBUG_MODE = False
 if not DEBUG_MODE:
     _ = preload_models()
+default_text = "This is a fork of Suno's Bark Spaces that allows you to supply your own voice."
+title = "# 🐶 Bark (with user-supplied voices)</div>"
 description = """
+This is a fork of Suno's [Bark Space](https://huggingface.co/spaces/suno/bark)
+that allows you to supply your own voice.
+You can use [GitMyLo's bark-voice-cloning Space](https://huggingface.co/spaces/GitMylo/bark-voice-cloning)
+to clone your own voice, or provide a voice file from
+[this Bark speaker directory](https://rsxdalv.github.io/bark-speaker-directory/),
+or use an alternate method to generate the same .npz format with semantic, coarse, and fine histories.
+Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark).
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects.
+This demo should be used for research purposes only. Commercial use is strictly prohibited.
+The model output is not censored and the authors do not endorse the opinions in the generated content.
 Use at your own risk.
 """
+if torch.cuda.is_available():
+    device = 'cuda'
+    device_dtype = torch.float16
+    xlp_kwargs['variant'] = 'fp16'
+else:
+    device = 'cpu'
+    device_dtype = torch.float32
+    description+='''
+This Space appears to be running on a CPU; it may take over 30 minutes to get results.  You may [duplicate this space](https://huggingface.co/spaces/clinteroni/bark-with-custom-voice?duplicate=true) and pay for an upgraded runtime instead.
+    '''
 article = """
 ## 🌎 Foreign Language
 """
 examples = [
+    ["I enjoy reading murder mysteries, long walks on the beach, sculpting mashed potatoes into the shape of a homicidal snowman, and telling you what's up.", 'examples/1.npz'],
+    ['The space clown descended the long staircase and invaded New Jersey.', 'examples/2.npz'],
 ]
+@spaces.GPU
 def gen_tts(text, history_prompt):  # , temp_semantic, temp_waveform):
     if DEBUG_MODE:
         audio_arr = np.zeros(SAMPLE_RATE)
     else:
         with gr.Column():
             input_text = gr.Textbox(
                 label="Input Text", lines=2, value=default_text, elem_id="input_text")
+            options = gr.File(elem_id="speaker_option")
+            run_button = gr.Button("Generate Audio")
         with gr.Column():
             audio_out = gr.Audio(label="Generated Audio",
                                  type="numpy", elem_id="audio_out")
                     loading_icon = gr.HTML(loading_icon_html)
                     share_button = gr.Button(
                         "Share to community", elem_id="share-btn")
+                    share_button.click(None, [], [], js=share_js)
     inputs = [input_text, options]
     outputs = [audio_out]
     gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,

examples/1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b16df700e5bc90c37eb502d769e6a7aa5cd7b8f4a59f7c46b3c39fdb85c02e2
+size 77588

examples/2.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:957c59a4490d1384020a2e2277a59a1d2b01b954c78ec4572520b04722512a27
+size 90388

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
-git+https://github.com/suno-ai/bark.git
-https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B46672772b4-cp38-cp38-linux_x86_64.whl
-https://download.pytorch.org/whl/nightly/cu117/torch-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
-https://download.pytorch.org/whl/nightly/cu117/torchvision-0.16.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
-https://download.pytorch.org/whl/nightly/cu117/torchaudio-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl

+bark
+torch
+torchvision
+torchaudio