Spaces:
Running
on
T4
Running
on
T4
Add Background Image to make music easily shareable on FB as video
Browse files- app.py +7 -4
- app_batched.py +3 -1
- assets/background.png +0 -0
- audiocraft/utils/extend.py +1 -1
app.py
CHANGED
|
@@ -25,8 +25,9 @@ def load_model(version):
|
|
| 25 |
return MusicGen.get_pretrained(version)
|
| 26 |
|
| 27 |
|
| 28 |
-
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef):
|
| 29 |
-
global MODEL
|
|
|
|
| 30 |
topk = int(topk)
|
| 31 |
if MODEL is None or MODEL.name != model:
|
| 32 |
MODEL = load_model(model)
|
|
@@ -77,7 +78,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
| 77 |
audio_write(
|
| 78 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 79 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
| 80 |
-
waveform_video = gr.make_waveform(file.name)
|
| 81 |
return waveform_video
|
| 82 |
|
| 83 |
|
|
@@ -105,6 +106,8 @@ def ui(**kwargs):
|
|
| 105 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
| 106 |
with gr.Row():
|
| 107 |
submit = gr.Button("Submit")
|
|
|
|
|
|
|
| 108 |
with gr.Row():
|
| 109 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
| 110 |
with gr.Row():
|
|
@@ -117,7 +120,7 @@ def ui(**kwargs):
|
|
| 117 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
| 118 |
with gr.Column():
|
| 119 |
output = gr.Video(label="Generated Music")
|
| 120 |
-
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef], outputs=[output])
|
| 121 |
gr.Examples(
|
| 122 |
fn=predict,
|
| 123 |
examples=[
|
|
|
|
| 25 |
return MusicGen.get_pretrained(version)
|
| 26 |
|
| 27 |
|
| 28 |
+
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
|
| 29 |
+
global MODEL
|
| 30 |
+
output_segments = None
|
| 31 |
topk = int(topk)
|
| 32 |
if MODEL is None or MODEL.name != model:
|
| 33 |
MODEL = load_model(model)
|
|
|
|
| 78 |
audio_write(
|
| 79 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
| 80 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
| 81 |
+
waveform_video = gr.make_waveform(file.name,bg_image=background, bar_count=40)
|
| 82 |
return waveform_video
|
| 83 |
|
| 84 |
|
|
|
|
| 106 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
| 107 |
with gr.Row():
|
| 108 |
submit = gr.Button("Submit")
|
| 109 |
+
with gr.Row():
|
| 110 |
+
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
| 111 |
with gr.Row():
|
| 112 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
| 113 |
with gr.Row():
|
|
|
|
| 120 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
| 121 |
with gr.Column():
|
| 122 |
output = gr.Video(label="Generated Music")
|
| 123 |
+
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
|
| 124 |
gr.Examples(
|
| 125 |
fn=predict,
|
| 126 |
examples=[
|
app_batched.py
CHANGED
|
@@ -40,6 +40,8 @@ def predict(texts, melodies):
|
|
| 40 |
processed_melodies.append(None)
|
| 41 |
else:
|
| 42 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
|
|
|
|
|
|
|
| 43 |
if melody.dim() == 1:
|
| 44 |
melody = melody[None]
|
| 45 |
melody = melody[..., :int(sr * duration)]
|
|
@@ -50,7 +52,7 @@ def predict(texts, melodies):
|
|
| 50 |
descriptions=texts,
|
| 51 |
melody_wavs=processed_melodies,
|
| 52 |
melody_sample_rate=target_sr,
|
| 53 |
-
progress=
|
| 54 |
)
|
| 55 |
|
| 56 |
outputs = outputs.detach().cpu().float()
|
|
|
|
| 40 |
processed_melodies.append(None)
|
| 41 |
else:
|
| 42 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
|
| 43 |
+
duration = min(duration, melody.shape[-1] / sr)
|
| 44 |
+
MODEL.set_generation_params(duration=duration)
|
| 45 |
if melody.dim() == 1:
|
| 46 |
melody = melody[None]
|
| 47 |
melody = melody[..., :int(sr * duration)]
|
|
|
|
| 52 |
descriptions=texts,
|
| 53 |
melody_wavs=processed_melodies,
|
| 54 |
melody_sample_rate=target_sr,
|
| 55 |
+
progress=True
|
| 56 |
)
|
| 57 |
|
| 58 |
outputs = outputs.detach().cpu().float()
|
assets/background.png
ADDED
|
audiocraft/utils/extend.py
CHANGED
|
@@ -45,7 +45,7 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
|
|
| 45 |
|
| 46 |
# Iterate over the segments to create list of Meldoy tensors
|
| 47 |
for segment_idx in range(total_segments):
|
| 48 |
-
print(f"segment {segment_idx} of {total_segments} \r")
|
| 49 |
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
|
| 50 |
|
| 51 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|
|
|
|
| 45 |
|
| 46 |
# Iterate over the segments to create list of Meldoy tensors
|
| 47 |
for segment_idx in range(total_segments):
|
| 48 |
+
print(f"segment {segment_idx + 1} of {total_segments} \r")
|
| 49 |
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
|
| 50 |
|
| 51 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|