Spaces:
Runtime error
Runtime error
add few examples and share to community button (#7)
Browse files- add few examples and share to community button (ebf12568b1845b59385e6653033b48efe332ea62)
Co-authored-by: Radamés Ajna <[email protected]>
- app.py +62 -26
- share_btn.py +34 -20
app.py
CHANGED
|
@@ -30,7 +30,7 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
|
|
| 30 |
# waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
|
| 31 |
if(len(waveform) == 1):
|
| 32 |
waveform = waveform[0]
|
| 33 |
-
return waveform
|
| 34 |
|
| 35 |
# iface = gr.Interface(fn=text2audio, inputs=[
|
| 36 |
# gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
|
|
@@ -42,17 +42,22 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
|
|
| 42 |
# )
|
| 43 |
# iface.launch(share=True)
|
| 44 |
|
|
|
|
| 45 |
css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
.gradio-container {
|
| 47 |
font-family: 'IBM Plex Sans', sans-serif;
|
| 48 |
}
|
| 49 |
.gr-button {
|
| 50 |
color: white;
|
| 51 |
-
border-color:
|
| 52 |
-
background:
|
| 53 |
}
|
| 54 |
input[type='range'] {
|
| 55 |
-
accent-color:
|
| 56 |
}
|
| 57 |
.dark input[type='range'] {
|
| 58 |
accent-color: #dfdfdf;
|
|
@@ -98,7 +103,6 @@ css = """
|
|
| 98 |
border-radius: 14px !important;
|
| 99 |
}
|
| 100 |
#advanced-options {
|
| 101 |
-
display: none;
|
| 102 |
margin-bottom: 20px;
|
| 103 |
}
|
| 104 |
.footer {
|
|
@@ -125,6 +129,12 @@ css = """
|
|
| 125 |
font-weight: bold;
|
| 126 |
font-size: 115%;
|
| 127 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
.animate-spin {
|
| 129 |
animation: spin 1s linear infinite;
|
| 130 |
}
|
|
@@ -154,16 +164,20 @@ css = """
|
|
| 154 |
#share-btn-container .wrap {
|
| 155 |
display: none !important;
|
| 156 |
}
|
| 157 |
-
|
| 158 |
.gr-form{
|
| 159 |
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
| 160 |
}
|
| 161 |
#prompt-container{
|
| 162 |
gap: 0;
|
| 163 |
}
|
| 164 |
-
#
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
"""
|
| 168 |
iface = gr.Blocks(css=css)
|
| 169 |
|
|
@@ -188,17 +202,21 @@ with iface:
|
|
| 188 |
</p>
|
| 189 |
</div>
|
| 190 |
"""
|
| 191 |
-
)
|
| 192 |
gr.HTML("""
|
| 193 |
-
<
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
<
|
| 197 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
with gr.Group():
|
| 199 |
with gr.Box():
|
| 200 |
############# Input
|
| 201 |
-
textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.")
|
| 202 |
|
| 203 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
| 204 |
seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
|
|
@@ -207,7 +225,7 @@ with iface:
|
|
| 207 |
n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
|
| 208 |
############# Output
|
| 209 |
# outputs=gr.Audio(label="Output", type="numpy")
|
| 210 |
-
outputs=gr.Video(label="Output")
|
| 211 |
|
| 212 |
# with gr.Group(elem_id="container-advanced-btns"):
|
| 213 |
# # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
|
|
@@ -216,10 +234,17 @@ with iface:
|
|
| 216 |
# loading_icon = gr.HTML(loading_icon_html, visible=False)
|
| 217 |
# share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
| 218 |
# outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
|
| 219 |
-
|
| 220 |
btn = gr.Button("Submit").style(full_width=True)
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
gr.HTML('''
|
| 224 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
| 225 |
<p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
|
|
@@ -229,17 +254,28 @@ with iface:
|
|
| 229 |
<br>
|
| 230 |
</div>
|
| 231 |
''')
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
with gr.Accordion("Additional information", open=False):
|
| 234 |
gr.HTML(
|
| 235 |
-
|
| 236 |
<div class="acknowledgments">
|
| 237 |
<p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
|
| 238 |
</div>
|
| 239 |
"""
|
| 240 |
-
|
| 241 |
# <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
|
| 242 |
-
|
| 243 |
-
iface.queue(concurrency_count
|
| 244 |
iface.launch(debug=True)
|
| 245 |
-
# iface.launch(debug=True, share=True)
|
|
|
|
| 30 |
# waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
|
| 31 |
if(len(waveform) == 1):
|
| 32 |
waveform = waveform[0]
|
| 33 |
+
return waveform
|
| 34 |
|
| 35 |
# iface = gr.Interface(fn=text2audio, inputs=[
|
| 36 |
# gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
|
|
|
|
| 42 |
# )
|
| 43 |
# iface.launch(share=True)
|
| 44 |
|
| 45 |
+
|
| 46 |
css = """
|
| 47 |
+
a {
|
| 48 |
+
color: inherit;
|
| 49 |
+
text-decoration: underline;
|
| 50 |
+
}
|
| 51 |
.gradio-container {
|
| 52 |
font-family: 'IBM Plex Sans', sans-serif;
|
| 53 |
}
|
| 54 |
.gr-button {
|
| 55 |
color: white;
|
| 56 |
+
border-color: #000000;
|
| 57 |
+
background: #000000;
|
| 58 |
}
|
| 59 |
input[type='range'] {
|
| 60 |
+
accent-color: #000000;
|
| 61 |
}
|
| 62 |
.dark input[type='range'] {
|
| 63 |
accent-color: #dfdfdf;
|
|
|
|
| 103 |
border-radius: 14px !important;
|
| 104 |
}
|
| 105 |
#advanced-options {
|
|
|
|
| 106 |
margin-bottom: 20px;
|
| 107 |
}
|
| 108 |
.footer {
|
|
|
|
| 129 |
font-weight: bold;
|
| 130 |
font-size: 115%;
|
| 131 |
}
|
| 132 |
+
#container-advanced-btns{
|
| 133 |
+
display: flex;
|
| 134 |
+
flex-wrap: wrap;
|
| 135 |
+
justify-content: space-between;
|
| 136 |
+
align-items: center;
|
| 137 |
+
}
|
| 138 |
.animate-spin {
|
| 139 |
animation: spin 1s linear infinite;
|
| 140 |
}
|
|
|
|
| 164 |
#share-btn-container .wrap {
|
| 165 |
display: none !important;
|
| 166 |
}
|
|
|
|
| 167 |
.gr-form{
|
| 168 |
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
| 169 |
}
|
| 170 |
#prompt-container{
|
| 171 |
gap: 0;
|
| 172 |
}
|
| 173 |
+
#generated_id{
|
| 174 |
+
min-height: 700px
|
| 175 |
+
}
|
| 176 |
+
#setting_id{
|
| 177 |
+
margin-bottom: 12px;
|
| 178 |
+
text-align: center;
|
| 179 |
+
font-weight: 900;
|
| 180 |
+
}
|
| 181 |
"""
|
| 182 |
iface = gr.Blocks(css=css)
|
| 183 |
|
|
|
|
| 202 |
</p>
|
| 203 |
</div>
|
| 204 |
"""
|
| 205 |
+
)
|
| 206 |
gr.HTML("""
|
| 207 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
| 208 |
+
AudioLDM: Text-to-Audio Generation with Latent Diffusion Models
|
| 209 |
+
</h1>
|
| 210 |
+
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
| 211 |
+
<br/>
|
| 212 |
+
<a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation?duplicate=true">
|
| 213 |
+
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
| 214 |
+
<p/>
|
| 215 |
+
""")
|
| 216 |
with gr.Group():
|
| 217 |
with gr.Box():
|
| 218 |
############# Input
|
| 219 |
+
textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.", elem_id="prompt-in")
|
| 220 |
|
| 221 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
| 222 |
seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
|
|
|
|
| 225 |
n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
|
| 226 |
############# Output
|
| 227 |
# outputs=gr.Audio(label="Output", type="numpy")
|
| 228 |
+
outputs=gr.Video(label="Output", elem_id="output-video")
|
| 229 |
|
| 230 |
# with gr.Group(elem_id="container-advanced-btns"):
|
| 231 |
# # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
|
|
|
|
| 234 |
# loading_icon = gr.HTML(loading_icon_html, visible=False)
|
| 235 |
# share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
| 236 |
# outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
|
|
|
|
| 237 |
btn = gr.Button("Submit").style(full_width=True)
|
| 238 |
+
|
| 239 |
+
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
| 240 |
+
community_icon = gr.HTML(community_icon_html)
|
| 241 |
+
loading_icon = gr.HTML(loading_icon_html)
|
| 242 |
+
share_button = gr.Button("Share to community", elem_id="share-btn")
|
| 243 |
+
|
| 244 |
+
btn.click(text2audio, inputs=[
|
| 245 |
+
textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs, share_group])
|
| 246 |
+
|
| 247 |
+
share_button.click(None, [], [], _js=share_js)
|
| 248 |
gr.HTML('''
|
| 249 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
| 250 |
<p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
|
|
|
|
| 254 |
<br>
|
| 255 |
</div>
|
| 256 |
''')
|
| 257 |
+
gr.Examples([
|
| 258 |
+
["A hammer is hitting a wooden surface", 5, 2.5, 45, 3],
|
| 259 |
+
["Peaceful and calming ambient music with singing bowl and other instruments.", 5, 2.5, 45, 3],
|
| 260 |
+
["A man is speaking in a small room.", 5, 2.5, 45, 3],
|
| 261 |
+
["A female is speaking followed by footstep sound", 5, 2.5, 45, 3],
|
| 262 |
+
["Wooden table tapping sound followed by water pouring sound.", 5, 2.5, 45, 3],
|
| 263 |
+
],
|
| 264 |
+
fn=text2audio,
|
| 265 |
+
inputs=[textbox, duration, guidance_scale, seed, n_candidates],
|
| 266 |
+
outputs=[outputs],
|
| 267 |
+
cache_examples=True,
|
| 268 |
+
)
|
| 269 |
with gr.Accordion("Additional information", open=False):
|
| 270 |
gr.HTML(
|
| 271 |
+
"""
|
| 272 |
<div class="acknowledgments">
|
| 273 |
<p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
|
| 274 |
</div>
|
| 275 |
"""
|
| 276 |
+
)
|
| 277 |
# <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
|
| 278 |
+
|
| 279 |
+
iface.queue(concurrency_count=3)
|
| 280 |
iface.launch(debug=True)
|
| 281 |
+
# iface.launch(debug=True, share=True)
|
share_btn.py
CHANGED
|
@@ -22,34 +22,48 @@ share_js = """async () => {
|
|
| 22 |
const url = await response.text();
|
| 23 |
return url;
|
| 24 |
}
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
const shareBtnEl = gradioEl.querySelector('#share-btn');
|
| 29 |
const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
|
| 30 |
const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
|
| 31 |
-
if(!
|
| 32 |
return;
|
| 33 |
};
|
| 34 |
shareBtnEl.style.pointerEvents = 'none';
|
| 35 |
shareIconEl.style.display = 'none';
|
| 36 |
loadingIconEl.style.removeProperty('display');
|
| 37 |
-
const
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
);
|
| 46 |
-
const urls = await Promise.all(files.map((f) => uploadFile(f)));
|
| 47 |
-
const htmlImgs = urls.map(url => `<img src='${url}' width='400' height='400'>`);
|
| 48 |
-
const descriptionMd = `<div style='display: flex; flex-wrap: wrap; column-gap: 0.75rem;'>
|
| 49 |
-
${htmlImgs.join(`\n`)}
|
| 50 |
-
</div>`;
|
| 51 |
const params = new URLSearchParams({
|
| 52 |
-
title:
|
| 53 |
description: descriptionMd,
|
| 54 |
});
|
| 55 |
const paramsStr = params.toString();
|
|
@@ -57,4 +71,4 @@ ${htmlImgs.join(`\n`)}
|
|
| 57 |
shareBtnEl.style.removeProperty('pointer-events');
|
| 58 |
shareIconEl.style.removeProperty('display');
|
| 59 |
loadingIconEl.style.display = 'none';
|
| 60 |
-
}"""
|
|
|
|
| 22 |
const url = await response.text();
|
| 23 |
return url;
|
| 24 |
}
|
| 25 |
+
async function getInputVideoFile(videoEl){
|
| 26 |
+
const res = await fetch(videoEl.src);
|
| 27 |
+
const blob = await res.blob();
|
| 28 |
+
const videoId = Date.now() % 200;
|
| 29 |
+
const fileName = `sd-perception-${{videoId}}.mp4`;
|
| 30 |
+
return new File([blob], fileName, { type: 'video/mp4' });
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
async function audioToBase64(audioFile) {
|
| 34 |
+
return new Promise((resolve, reject) => {
|
| 35 |
+
let reader = new FileReader();
|
| 36 |
+
reader.readAsDataURL(audioFile);
|
| 37 |
+
reader.onload = () => resolve(reader.result);
|
| 38 |
+
reader.onerror = error => reject(error);
|
| 39 |
+
|
| 40 |
+
});
|
| 41 |
+
}
|
| 42 |
+
const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app');
|
| 43 |
+
const inputPromptEl = gradioEl.querySelector('#prompt-in input').value;
|
| 44 |
+
const outputVideoEl = gradioEl.querySelector('#output-video video');
|
| 45 |
+
|
| 46 |
+
let titleTxt = `Text-to-Audio: ${inputPromptEl}`;
|
| 47 |
+
|
| 48 |
const shareBtnEl = gradioEl.querySelector('#share-btn');
|
| 49 |
const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
|
| 50 |
const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
|
| 51 |
+
if(!outputVideoEl){
|
| 52 |
return;
|
| 53 |
};
|
| 54 |
shareBtnEl.style.pointerEvents = 'none';
|
| 55 |
shareIconEl.style.display = 'none';
|
| 56 |
loadingIconEl.style.removeProperty('display');
|
| 57 |
+
const outputVideo = await getInputVideoFile(outputVideoEl);
|
| 58 |
+
const urlOutputVideo = await uploadFile(outputVideo);
|
| 59 |
+
|
| 60 |
+
const descriptionMd = `
|
| 61 |
+
##### ${inputPromptEl}
|
| 62 |
+
|
| 63 |
+
${urlOutputVideo}
|
| 64 |
+
`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
const params = new URLSearchParams({
|
| 66 |
+
title: titleTxt,
|
| 67 |
description: descriptionMd,
|
| 68 |
});
|
| 69 |
const paramsStr = params.toString();
|
|
|
|
| 71 |
shareBtnEl.style.removeProperty('pointer-events');
|
| 72 |
shareIconEl.style.removeProperty('display');
|
| 73 |
loadingIconEl.style.display = 'none';
|
| 74 |
+
}"""
|