Clint Adams commited on
Commit
b618cc5
1 Parent(s): 6c33c6b

change voice input

Browse files
Files changed (5) hide show
  1. README.md +2 -3
  2. app.py +34 -33
  3. examples/1.npz +3 -0
  4. examples/2.npz +3 -0
  5. requirements.txt +4 -5
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
- title: Bark
3
  emoji: 🐶
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.27.0
8
- python_version: 3.8.15
9
  app_file: app.py
10
  pinned: false
11
  license: cc-by-nc-4.0
 
1
  ---
2
+ title: Bark (with user-supplied voices)
3
  emoji: 🐶
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.26.0
 
8
  app_file: app.py
9
  pinned: false
10
  license: cc-by-nc-4.0
app.py CHANGED
@@ -3,39 +3,46 @@ import gradio as gr
3
  from bark import SAMPLE_RATE, generate_audio, preload_models
4
  from bark.generation import SUPPORTED_LANGS
5
  from share_btn import community_icon_html, loading_icon_html, share_js
 
 
6
 
7
  DEBUG_MODE = False
8
 
9
  if not DEBUG_MODE:
10
  _ = preload_models()
11
 
12
- AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
13
- PROMPT_LOOKUP = {}
14
- for _, lang in SUPPORTED_LANGS:
15
- for n in range(10):
16
- label = f"Speaker {n} ({lang})"
17
- AVAILABLE_PROMPTS.append(label)
18
- PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
19
- PROMPT_LOOKUP["Unconditional"] = None
20
- PROMPT_LOOKUP["Announcer"] = "announcer"
21
 
22
- default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
23
-
24
- title = "# 🐶 Bark</div>"
25
 
26
  description = """
27
- <div>
28
- <a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
29
- <a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
30
- <a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
31
- </div>
32
- Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
33
- Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
34
- This demo should be used for research purposes only. Commercial use is strictly prohibited. \
35
- The model output is not censored and the authors do not endorse the opinions in the generated content. \
 
 
 
36
  Use at your own risk.
37
  """
38
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  article = """
40
 
41
  ## 🌎 Foreign Language
@@ -108,17 +115,12 @@ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commerc
108
  """
109
 
110
  examples = [
111
- ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!",
112
- "Unconditional"], # , 0.7, 0.7],
113
- ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
114
- "Speaker 1 (en)"], # , 0.7, 0.7],
115
- ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
116
- "Speaker 0 (es)"], # , 0.7, 0.7],
117
  ]
118
 
119
-
120
  def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
121
- history_prompt = PROMPT_LOOKUP[history_prompt]
122
  if DEBUG_MODE:
123
  audio_arr = np.zeros(SAMPLE_RATE)
124
  else:
@@ -171,9 +173,8 @@ with gr.Blocks(css=css) as block:
171
  with gr.Column():
172
  input_text = gr.Textbox(
173
  label="Input Text", lines=2, value=default_text, elem_id="input_text")
174
- options = gr.Dropdown(
175
- AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
176
- run_button = gr.Button(text="Generate Audio", type="button")
177
  with gr.Column():
178
  audio_out = gr.Audio(label="Generated Audio",
179
  type="numpy", elem_id="audio_out")
@@ -183,7 +184,7 @@ with gr.Blocks(css=css) as block:
183
  loading_icon = gr.HTML(loading_icon_html)
184
  share_button = gr.Button(
185
  "Share to community", elem_id="share-btn")
186
- share_button.click(None, [], [], _js=share_js)
187
  inputs = [input_text, options]
188
  outputs = [audio_out]
189
  gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
 
3
  from bark import SAMPLE_RATE, generate_audio, preload_models
4
  from bark.generation import SUPPORTED_LANGS
5
  from share_btn import community_icon_html, loading_icon_html, share_js
6
+ import spaces
7
+ import torch
8
 
9
  DEBUG_MODE = False
10
 
11
  if not DEBUG_MODE:
12
  _ = preload_models()
13
 
14
+ default_text = "This is a fork of Suno's Bark Spaces that allows you to supply your own voice."
 
 
 
 
 
 
 
 
15
 
16
+ title = "# 🐶 Bark (with user-supplied voices)</div>"
 
 
17
 
18
  description = """
19
+ This is a fork of Suno's [Bark Space](https://huggingface.co/spaces/suno/bark)
20
+ that allows you to supply your own voice.
21
+
22
+ You can use [GitMyLo's bark-voice-cloning Space](https://huggingface.co/spaces/GitMylo/bark-voice-cloning)
23
+ to clone your own voice, or provide a voice file from
24
+ [this Bark speaker directory](https://rsxdalv.github.io/bark-speaker-directory/),
25
+ or use an alternate method to generate the same .npz format with semantic, coarse, and fine histories.
26
+
27
+ Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark).
28
+ Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects.
29
+ This demo should be used for research purposes only. Commercial use is strictly prohibited.
30
+ The model output is not censored and the authors do not endorse the opinions in the generated content.
31
  Use at your own risk.
32
  """
33
 
34
+ if torch.cuda.is_available():
35
+ device = 'cuda'
36
+ device_dtype = torch.float16
37
+ xlp_kwargs['variant'] = 'fp16'
38
+ else:
39
+ device = 'cpu'
40
+ device_dtype = torch.float32
41
+ description+='''
42
+
43
+ This Space appears to be running on a CPU; it may take over 30 minutes to get results. You may [duplicate this space](https://huggingface.co/spaces/clinteroni/bark-with-custom-voice?duplicate=true) and pay for an upgraded runtime instead.
44
+ '''
45
+
46
  article = """
47
 
48
  ## 🌎 Foreign Language
 
115
  """
116
 
117
  examples = [
118
+ ["I enjoy reading murder mysteries, long walks on the beach, sculpting mashed potatoes into the shape of a homicidal snowman, and telling you what's up.", 'examples/1.npz'],
119
+ ['The space clown descended the long staircase and invaded New Jersey.', 'examples/2.npz'],
 
 
 
 
120
  ]
121
 
122
+ @spaces.GPU
123
  def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
 
124
  if DEBUG_MODE:
125
  audio_arr = np.zeros(SAMPLE_RATE)
126
  else:
 
173
  with gr.Column():
174
  input_text = gr.Textbox(
175
  label="Input Text", lines=2, value=default_text, elem_id="input_text")
176
+ options = gr.File(elem_id="speaker_option")
177
+ run_button = gr.Button("Generate Audio")
 
178
  with gr.Column():
179
  audio_out = gr.Audio(label="Generated Audio",
180
  type="numpy", elem_id="audio_out")
 
184
  loading_icon = gr.HTML(loading_icon_html)
185
  share_button = gr.Button(
186
  "Share to community", elem_id="share-btn")
187
+ share_button.click(None, [], [], js=share_js)
188
  inputs = [input_text, options]
189
  outputs = [audio_out]
190
  gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
examples/1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b16df700e5bc90c37eb502d769e6a7aa5cd7b8f4a59f7c46b3c39fdb85c02e2
3
+ size 77588
examples/2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957c59a4490d1384020a2e2277a59a1d2b01b954c78ec4572520b04722512a27
3
+ size 90388
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- git+https://github.com/suno-ai/bark.git
2
- https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B46672772b4-cp38-cp38-linux_x86_64.whl
3
- https://download.pytorch.org/whl/nightly/cu117/torch-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
4
- https://download.pytorch.org/whl/nightly/cu117/torchvision-0.16.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
5
- https://download.pytorch.org/whl/nightly/cu117/torchaudio-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
 
1
+ bark
2
+ torch
3
+ torchvision
4
+ torchaudio