Spaces:
Runtime error
Runtime error
Update style.css (#1)
Browse files- Update style.css (ca84b15e6e9b7a875514b0f3b3f8eac7978aab58)
- Update app.py (899e1ccfd908e1924bc30c7488e1f1146fa6b456)
- Update app.py (6fb4d8917317b55b6b4dacb436bb1f7068d89bca)
app.py
CHANGED
|
@@ -153,6 +153,10 @@ Demo for the text-based editing method introduced in:
|
|
| 153 |
<a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
|
| 154 |
</p>
|
| 155 |
<p style="font-size:larger">
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
<b>Instructions:</b><br>
|
| 157 |
Provide an input audio and a target prompt to edit the audio. <br>
|
| 158 |
T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
|
|
@@ -169,7 +173,6 @@ For faster inference without waiting in queue, you may duplicate the space and u
|
|
| 169 |
<a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
|
| 170 |
<img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
|
| 171 |
</p>
|
| 172 |
-
|
| 173 |
"""
|
| 174 |
|
| 175 |
with gr.Blocks(css='style.css') as demo:
|
|
@@ -187,16 +190,14 @@ with gr.Blocks(css='style.css') as demo:
|
|
| 187 |
do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
|
| 188 |
|
| 189 |
with gr.Row():
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
|
| 194 |
-
interactive=True, scale=1)
|
| 195 |
|
| 196 |
-
|
| 197 |
-
tar_prompt = gr.Textbox(label="
|
| 198 |
lines=2, interactive=True)
|
| 199 |
-
|
| 200 |
|
| 201 |
with gr.Row():
|
| 202 |
with gr.Column():
|
|
@@ -204,13 +205,16 @@ with gr.Blocks(css='style.css') as demo:
|
|
| 204 |
|
| 205 |
with gr.Row():
|
| 206 |
t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
|
| 207 |
-
info="Higher T-start -> stronger edit. Lower T-start ->
|
| 208 |
model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
|
| 209 |
"cvssp/audioldm2-large",
|
| 210 |
"cvssp/audioldm2-music"],
|
| 211 |
-
info="Choose a checkpoint suitable for your intended audio and edit
|
| 212 |
value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
|
| 213 |
with gr.Accordion("More Options", open=False):
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
with gr.Row():
|
| 216 |
cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
|
|
|
|
| 153 |
<a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
|
| 154 |
</p>
|
| 155 |
<p style="font-size:larger">
|
| 156 |
+
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
help = """
|
| 160 |
<b>Instructions:</b><br>
|
| 161 |
Provide an input audio and a target prompt to edit the audio. <br>
|
| 162 |
T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
|
|
|
|
| 173 |
<a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
|
| 174 |
<img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
|
| 175 |
</p>
|
|
|
|
| 176 |
"""
|
| 177 |
|
| 178 |
with gr.Blocks(css='style.css') as demo:
|
|
|
|
| 190 |
do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
|
| 191 |
|
| 192 |
with gr.Row():
|
| 193 |
+
input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
|
| 194 |
+
interactive=True, scale=1)
|
| 195 |
+
output_audio = gr.Audio(label="Edited Audio", interactive=False, scale=1)
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
with gr.Row():
|
| 198 |
+
tar_prompt = gr.Textbox(label="Prompt", info="Describe your desired edited output", placeholder="a recording of a happy upbeat arcade game soundtrack",
|
| 199 |
lines=2, interactive=True)
|
| 200 |
+
|
| 201 |
|
| 202 |
with gr.Row():
|
| 203 |
with gr.Column():
|
|
|
|
| 205 |
|
| 206 |
with gr.Row():
|
| 207 |
t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
|
| 208 |
+
info="Higher T-start -> stronger edit. Lower T-start -> closer to original audio")
|
| 209 |
model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
|
| 210 |
"cvssp/audioldm2-large",
|
| 211 |
"cvssp/audioldm2-music"],
|
| 212 |
+
info="Choose a checkpoint suitable for your intended audio and edit",
|
| 213 |
value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
|
| 214 |
with gr.Accordion("More Options", open=False):
|
| 215 |
+
with gr.Row():
|
| 216 |
+
src_prompt = gr.Textbox(label="Source Prompt", lines=2, interactive=True, info= "Optional: Describe the original audio input",
|
| 217 |
+
placeholder="A recording of a happy upbeat classical music piece",)
|
| 218 |
|
| 219 |
with gr.Row():
|
| 220 |
cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
|
style.css
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
.gradio-container {
|
| 2 |
-
max-width:
|
| 3 |
padding-top: 1.5rem !important;
|
| 4 |
}
|
|
|
|
| 1 |
.gradio-container {
|
| 2 |
+
max-width: 700px !important;
|
| 3 |
padding-top: 1.5rem !important;
|
| 4 |
}
|