Surn commited on
Commit
1a6de5e
1 Parent(s): 50d48cc

Interrupt Button Update

Browse files

Stereo wav file
Improved Melody guided, partial

Files changed (3) hide show
  1. app.py +32 -11
  2. audiocraft/data/audio.py +4 -2
  3. audiocraft/utils/extend.py +9 -1
app.py CHANGED
@@ -15,17 +15,20 @@ import time
15
  import warnings
16
  from audiocraft.models import MusicGen
17
  from audiocraft.data.audio import audio_write
18
- from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
19
  import numpy as np
20
  import random
21
 
22
  MODEL = None
23
  MODELS = None
24
- IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
25
  INTERRUPTED = False
26
  UNLOAD_MODEL = False
27
  MOVE_TO_CPU = False
28
 
 
 
 
29
  def interrupt():
30
  global INTERRUPTING
31
  INTERRUPTING = True
@@ -63,9 +66,18 @@ def load_model(version):
63
 
64
 
65
  def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color, seed, overlap=1):
66
- global MODEL, INTERRUPTED
67
  output_segments = None
68
- topk = int(topk)
 
 
 
 
 
 
 
 
 
69
  if MODEL is None or MODEL.name != model:
70
  MODEL = load_model(model)
71
  else:
@@ -92,6 +104,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
92
  seed = random.randint(0, 0xffff_ffff_ffff)
93
  torch.manual_seed(seed)
94
 
 
95
  print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')
96
  MODEL.set_generation_params(
97
  use_sampling=True,
@@ -134,6 +147,12 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
134
  duration -= segment_duration - overlap
135
  output_segments.append(next_segment)
136
 
 
 
 
 
 
 
137
  if output_segments:
138
  try:
139
  # Combine the output segments into one long audio file or stack tracks
@@ -143,21 +162,22 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
143
  output = output_segments[0]
144
  for i in range(1, len(output_segments)):
145
  overlap_samples = overlap * MODEL.sample_rate
146
- output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=dimension)
147
  output = output.detach().cpu().float()[0]
148
  except Exception as e:
149
  print(f"Error combining segments: {e}. Using the first segment only.")
150
  output = output_segments[0].detach().cpu().float()[0]
151
  else:
152
  output = output.detach().cpu().float()[0]
 
153
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
154
  if include_settings:
155
- video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Melody File:#todo"
156
  background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
157
  audio_write(
158
  file.name, output, MODEL.sample_rate, strategy="loudness",
159
- loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
160
- waveform_video = make_waveform(file.name,bg_image=background, bar_count=40)
161
  if MOVE_TO_CPU:
162
  MODEL.to('cpu')
163
  if UNLOAD_MODEL:
@@ -177,6 +197,7 @@ def ui(**kwargs):
177
  # UnlimitedMusicGen
178
  This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
179
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
 
180
 
181
  Disclaimer: This won't run on CPU only. Clone this App and run on GPU instance!
182
  """
@@ -208,12 +229,12 @@ def ui(**kwargs):
208
  with gr.Row():
209
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
210
  with gr.Row():
211
- duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
212
  overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
213
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
214
  with gr.Row():
215
- topk = gr.Number(label="Top-k", value=250, interactive=True)
216
- topp = gr.Number(label="Top-p", value=0, interactive=True)
217
  temperature = gr.Number(label="Randomness Temperature", value=0.75, precision=None, interactive=True)
218
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=5.5, precision=None, interactive=True)
219
  with gr.Row():
 
15
  import warnings
16
  from audiocraft.models import MusicGen
17
  from audiocraft.data.audio import audio_write
18
+ from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
19
  import numpy as np
20
  import random
21
 
22
  MODEL = None
23
  MODELS = None
24
+ IS_SHARED_SPACE = "Surn/UnlimitedMusicGen" in os.environ.get('SPACE_ID', '')
25
  INTERRUPTED = False
26
  UNLOAD_MODEL = False
27
  MOVE_TO_CPU = False
28
 
29
+ def interrupt_callback():
30
+ return INTERRUPTED
31
+
32
  def interrupt():
33
  global INTERRUPTING
34
  INTERRUPTING = True
 
66
 
67
 
68
  def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color, seed, overlap=1):
69
+ global MODEL, INTERRUPTED, INTERRUPTING
70
  output_segments = None
71
+
72
+ INTERRUPTED = False
73
+ INTERRUPTING = False
74
+ if temperature < 0:
75
+ raise gr.Error("Temperature must be >= 0.")
76
+ if topk < 0:
77
+ raise gr.Error("Topk must be non-negative.")
78
+ if topp < 0:
79
+ raise gr.Error("Topp must be non-negative.")
80
+
81
  if MODEL is None or MODEL.name != model:
82
  MODEL = load_model(model)
83
  else:
 
104
  seed = random.randint(0, 0xffff_ffff_ffff)
105
  torch.manual_seed(seed)
106
 
107
+
108
  print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')
109
  MODEL.set_generation_params(
110
  use_sampling=True,
 
147
  duration -= segment_duration - overlap
148
  output_segments.append(next_segment)
149
 
150
+ if INTERRUPTING:
151
+ INTERRUPTED = True
152
+ INTERRUPTING = False
153
+ print("Function execution interrupted!")
154
+ raise gr.Error("Interrupted.")
155
+
156
  if output_segments:
157
  try:
158
  # Combine the output segments into one long audio file or stack tracks
 
162
  output = output_segments[0]
163
  for i in range(1, len(output_segments)):
164
  overlap_samples = overlap * MODEL.sample_rate
165
+ output = torch.cat([output[:, :, :-overlap_samples], output_segments[i]], dim=dimension)
166
  output = output.detach().cpu().float()[0]
167
  except Exception as e:
168
  print(f"Error combining segments: {e}. Using the first segment only.")
169
  output = output_segments[0].detach().cpu().float()[0]
170
  else:
171
  output = output.detach().cpu().float()[0]
172
+
173
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
174
  if include_settings:
175
+ video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody File:#todo"
176
  background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
177
  audio_write(
178
  file.name, output, MODEL.sample_rate, strategy="loudness",
179
+ loudness_headroom_db=19, loudness_compressor=True, add_suffix=False, channels=2)
180
+ waveform_video = make_waveform(file.name,bg_image=background, bar_count=45)
181
  if MOVE_TO_CPU:
182
  MODEL.to('cpu')
183
  if UNLOAD_MODEL:
 
197
  # UnlimitedMusicGen
198
  This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
199
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
200
+ Todo: Working on improved Melody Conditioned Music Generation transitions.
201
 
202
  Disclaimer: This won't run on CPU only. Clone this App and run on GPU instance!
203
  """
 
229
  with gr.Row():
230
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
231
  with gr.Row():
232
+ duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
233
  overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
234
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
235
  with gr.Row():
236
+ topk = gr.Number(label="Top-k", value=250, precision=0, interactive=True)
237
+ topp = gr.Number(label="Top-p", value=0, precision=0, interactive=True)
238
  temperature = gr.Number(label="Randomness Temperature", value=0.75, precision=None, interactive=True)
239
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=5.5, precision=None, interactive=True)
240
  with gr.Row():
audiocraft/data/audio.py CHANGED
@@ -22,7 +22,7 @@ import torchaudio as ta
22
 
23
  import av
24
 
25
- from .audio_utils import f32_pcm, i16_pcm, normalize_audio
26
 
27
 
28
  _av_initialized = False
@@ -157,7 +157,7 @@ def audio_write(stem_name: tp.Union[str, Path],
157
  rms_headroom_db: float = 18, loudness_headroom_db: float = 14,
158
  loudness_compressor: bool = False,
159
  log_clipping: bool = True, make_parent_dir: bool = True,
160
- add_suffix: bool = True) -> Path:
161
  """Convenience function for saving audio to disk. Returns the filename the audio was written to.
162
 
163
  Args:
@@ -190,6 +190,8 @@ def audio_write(stem_name: tp.Union[str, Path],
190
  wav = normalize_audio(wav, normalize, strategy, peak_clip_headroom_db,
191
  rms_headroom_db, loudness_headroom_db, log_clipping=log_clipping,
192
  sample_rate=sample_rate, stem_name=str(stem_name))
 
 
193
  kwargs: dict = {}
194
  if format == 'mp3':
195
  suffix = '.mp3'
 
22
 
23
  import av
24
 
25
+ from .audio_utils import f32_pcm, i16_pcm, normalize_audio, convert_audio
26
 
27
 
28
  _av_initialized = False
 
157
  rms_headroom_db: float = 18, loudness_headroom_db: float = 14,
158
  loudness_compressor: bool = False,
159
  log_clipping: bool = True, make_parent_dir: bool = True,
160
+ add_suffix: bool = True, channels:int = 1) -> Path:
161
  """Convenience function for saving audio to disk. Returns the filename the audio was written to.
162
 
163
  Args:
 
190
  wav = normalize_audio(wav, normalize, strategy, peak_clip_headroom_db,
191
  rms_headroom_db, loudness_headroom_db, log_clipping=log_clipping,
192
  sample_rate=sample_rate, stem_name=str(stem_name))
193
+ if channels > 1:
194
+ wav = convert_audio(wav,sample_rate, sample_rate, channels)
195
  kwargs: dict = {}
196
  if format == 'mp3':
197
  suffix = '.mp3'
audiocraft/utils/extend.py CHANGED
@@ -11,6 +11,9 @@ import requests
11
  from io import BytesIO
12
  from huggingface_hub import hf_hub_download
13
 
 
 
 
14
  def separate_audio_segments(audio, segment_duration=30, overlap=1):
15
  sr, audio_data = audio[0], audio[1]
16
 
@@ -65,6 +68,8 @@ def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:
65
 
66
  # Iterate over the segments to create list of Meldoy tensors
67
  for segment_idx in range(total_segments):
 
 
68
  print(f"segment {segment_idx + 1} of {total_segments} \r")
69
  sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
70
 
@@ -77,6 +82,9 @@ def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:
77
 
78
  torch.manual_seed(seed)
79
  for idx, verse in enumerate(melodys):
 
 
 
80
  print(f"Generating New Melody Segment {idx + 1}: {text}\r")
81
  if output_segments:
82
  # If this isn't the first segment, use the last chunk of the previous segment as the input
@@ -166,7 +174,7 @@ def load_font(font_name, font_size=16):
166
 
167
  if font is None:
168
  try:
169
- font_path = ImageFont.truetype(hf_hub_download(repo_id="Surn/UnlimitedMusicGen", filename="assets/" + font_name, repo_type="space"), encoding="UTF-8")
170
  font = ImageFont.truetype(font_path, font_size)
171
  except (FileNotFoundError, OSError):
172
  print("Font not found. Trying to download from local assets folder...\n")
 
11
  from io import BytesIO
12
  from huggingface_hub import hf_hub_download
13
 
14
+
15
+ INTERRUPTING = False
16
+
17
  def separate_audio_segments(audio, segment_duration=30, overlap=1):
18
  sr, audio_data = audio[0], audio[1]
19
 
 
68
 
69
  # Iterate over the segments to create list of Meldoy tensors
70
  for segment_idx in range(total_segments):
71
+ if INTERRUPTING:
72
+ return [], duration
73
  print(f"segment {segment_idx + 1} of {total_segments} \r")
74
  sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
75
 
 
82
 
83
  torch.manual_seed(seed)
84
  for idx, verse in enumerate(melodys):
85
+ if INTERRUPTING:
86
+ return output_segments, duration - (segment_duration * len(output_segments))
87
+
88
  print(f"Generating New Melody Segment {idx + 1}: {text}\r")
89
  if output_segments:
90
  # If this isn't the first segment, use the last chunk of the previous segment as the input
 
174
 
175
  if font is None:
176
  try:
177
+ font_path = ImageFont.truetype(hf_hub_download(repo_id=os.environ.get('SPACE_ID', ''), filename="assets/" + font_name, repo_type="space"), encoding="UTF-8")
178
  font = ImageFont.truetype(font_path, font_size)
179
  except (FileNotFoundError, OSError):
180
  print("Font not found. Trying to download from local assets folder...\n")