Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	Prompt conditioning sample segments ( -1 Conditions each verse
Browse filesUpdate title to Melody Conditioning file name upon load
Separate title and settings inclusions to background
Fixed a bug in my 6/19 code... stupid logical mistake
- app.py +70 -16
- assets/favicon.ico +0 -0
- audiocraft/utils/extend.py +46 -33
    	
        app.py
    CHANGED
    
    | @@ -19,6 +19,8 @@ from audiocraft.data.audio_utils import apply_fade, apply_tafade | |
| 19 | 
             
            from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
         | 
| 20 | 
             
            import numpy as np
         | 
| 21 | 
             
            import random
         | 
|  | |
|  | |
| 22 |  | 
| 23 | 
             
            MODEL = None
         | 
| 24 | 
             
            MODELS = None
         | 
| @@ -26,6 +28,7 @@ IS_SHARED_SPACE = "Surn/UnlimitedMusicGen" in os.environ.get('SPACE_ID', '') | |
| 26 | 
             
            INTERRUPTED = False
         | 
| 27 | 
             
            UNLOAD_MODEL = False
         | 
| 28 | 
             
            MOVE_TO_CPU = False
         | 
|  | |
| 29 |  | 
| 30 | 
             
            def interrupt_callback():
         | 
| 31 | 
             
                return INTERRUPTED
         | 
| @@ -65,11 +68,53 @@ def load_model(version): | |
| 65 | 
             
                    print("Cached model loaded in %.2fs" % (time.monotonic() - t1))
         | 
| 66 | 
             
                    return result
         | 
| 67 |  | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 71 | 
             
                output_segments = None
         | 
| 72 | 
            -
             | 
| 73 | 
             
                INTERRUPTED = False
         | 
| 74 | 
             
                INTERRUPTING = False
         | 
| 75 | 
             
                if temperature < 0:
         | 
| @@ -126,7 +171,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c | |
| 126 | 
             
                    if melody:
         | 
| 127 | 
             
                        # todo return excess duration, load next model and continue in loop structure building up output_segments
         | 
| 128 | 
             
                        if duration > MODEL.lm.cfg.dataset.segment_duration:
         | 
| 129 | 
            -
                            output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration)
         | 
| 130 | 
             
                        else:
         | 
| 131 | 
             
                            # pure original code
         | 
| 132 | 
             
                            sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
         | 
| @@ -191,10 +236,10 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c | |
| 191 | 
             
                else:
         | 
| 192 | 
             
                    output = output.detach().cpu().float()[0]
         | 
| 193 |  | 
| 194 | 
            -
                with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
         | 
| 195 | 
            -
                     | 
| 196 | 
            -
             | 
| 197 | 
            -
                        background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
         | 
| 198 | 
             
                    audio_write(
         | 
| 199 | 
             
                        file.name, output, MODEL.sample_rate, strategy="loudness",
         | 
| 200 | 
             
                        loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
         | 
| @@ -210,6 +255,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c | |
| 210 | 
             
            def ui(**kwargs):
         | 
| 211 | 
             
                css="""
         | 
| 212 | 
             
                #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
         | 
|  | |
| 213 | 
             
                a {text-decoration-line: underline; font-weight: 600;}
         | 
| 214 | 
             
                """
         | 
| 215 | 
             
                with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
         | 
| @@ -235,15 +281,20 @@ def ui(**kwargs): | |
| 235 | 
             
                    with gr.Row():
         | 
| 236 | 
             
                        with gr.Column():
         | 
| 237 | 
             
                            with gr.Row():
         | 
| 238 | 
            -
                                text = gr.Text(label=" | 
| 239 | 
            -
                                 | 
|  | |
|  | |
|  | |
| 240 | 
             
                            with gr.Row():
         | 
| 241 | 
             
                                submit = gr.Button("Submit")
         | 
| 242 | 
             
                                # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
         | 
| 243 | 
             
                                _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
         | 
| 244 | 
             
                            with gr.Row():
         | 
| 245 | 
             
                                background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
         | 
| 246 | 
            -
                                 | 
|  | |
|  | |
| 247 | 
             
                            with gr.Row():
         | 
| 248 | 
             
                                title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
         | 
| 249 | 
             
                                settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
         | 
| @@ -252,7 +303,7 @@ def ui(**kwargs): | |
| 252 | 
             
                                model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
         | 
| 253 | 
             
                            with gr.Row():
         | 
| 254 | 
             
                                duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
         | 
| 255 | 
            -
                                overlap = gr.Slider(minimum=1, maximum=15, value= | 
| 256 | 
             
                                dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
         | 
| 257 | 
             
                            with gr.Row():
         | 
| 258 | 
             
                                topk = gr.Number(label="Top-k", value=250, precision=0, interactive=True)
         | 
| @@ -267,8 +318,10 @@ def ui(**kwargs): | |
| 267 | 
             
                            output = gr.Video(label="Generated Music")
         | 
| 268 | 
             
                            seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
         | 
| 269 |  | 
| 270 | 
            -
                     | 
| 271 | 
            -
                     | 
|  | |
|  | |
| 272 | 
             
                    gr.Examples(
         | 
| 273 | 
             
                        fn=predict,
         | 
| 274 | 
             
                        examples=[
         | 
| @@ -307,10 +360,11 @@ def ui(**kwargs): | |
| 307 | 
             
                    share = kwargs.get('share', False)
         | 
| 308 | 
             
                    if share:
         | 
| 309 | 
             
                        launch_kwargs['share'] = share
         | 
|  | |
| 310 |  | 
| 311 |  | 
| 312 |  | 
| 313 | 
            -
                    demo.queue(max_size= | 
| 314 |  | 
| 315 | 
             
            if __name__ == "__main__":
         | 
| 316 | 
             
                parser = argparse.ArgumentParser()
         | 
|  | |
| 19 | 
             
            from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
         | 
| 20 | 
             
            import numpy as np
         | 
| 21 | 
             
            import random
         | 
| 22 | 
            +
            from pathlib import Path
         | 
| 23 | 
            +
            from typing import List, Union
         | 
| 24 |  | 
| 25 | 
             
            MODEL = None
         | 
| 26 | 
             
            MODELS = None
         | 
|  | |
| 28 | 
             
            INTERRUPTED = False
         | 
| 29 | 
             
            UNLOAD_MODEL = False
         | 
| 30 | 
             
            MOVE_TO_CPU = False
         | 
| 31 | 
            +
            MAX_PROMPT_INDEX = 0
         | 
| 32 |  | 
| 33 | 
             
            def interrupt_callback():
         | 
| 34 | 
             
                return INTERRUPTED
         | 
|  | |
| 68 | 
             
                    print("Cached model loaded in %.2fs" % (time.monotonic() - t1))
         | 
| 69 | 
             
                    return result
         | 
| 70 |  | 
| 71 | 
            +
            def get_filename(file):
         | 
| 72 | 
            +
                # extract filename from file object
         | 
| 73 | 
            +
                filename = None
         | 
| 74 | 
            +
                if file is not None:
         | 
| 75 | 
            +
                    filename = file.name
         | 
| 76 | 
            +
                return filename
         | 
| 77 | 
            +
             | 
| 78 | 
            +
            def get_filename_from_filepath(filepath):
         | 
| 79 | 
            +
                file_name = os.path.basename(filepath)
         | 
| 80 | 
            +
                file_base, file_extension = os.path.splitext(file_name)
         | 
| 81 | 
            +
                return file_base, file_extension
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            def load_melody_filepath(melody_filepath, title):
         | 
| 84 | 
            +
                # get melody filename
         | 
| 85 | 
            +
                #$Union[str, os.PathLike]    
         | 
| 86 | 
            +
                symbols = ['_', '.', '-']
         | 
| 87 | 
            +
                if melody_filepath is None:
         | 
| 88 | 
            +
                    return  None, title
         | 
| 89 | 
            +
                
         | 
| 90 | 
            +
                if (title is None) or ("MusicGen" in title) or (title == ""):
         | 
| 91 | 
            +
                    melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
         | 
| 92 | 
            +
                    # fix melody name for symbols
         | 
| 93 | 
            +
                    for symbol in symbols:
         | 
| 94 | 
            +
                        melody_name = melody_name.replace(symbol, ' ').title()
         | 
| 95 | 
            +
                else:
         | 
| 96 | 
            +
                    melody_name = title
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}\n")
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                return  gr.Audio.update(value=melody_filepath), gr.Textbox.update(value=melody_name)
         | 
| 101 | 
            +
             | 
| 102 | 
            +
            def load_melody(melody, prompt_index):
         | 
| 103 | 
            +
                # get melody length in number of segments and modify the UI
         | 
| 104 | 
            +
                if melody is None:
         | 
| 105 | 
            +
                    return  prompt_index
         | 
| 106 | 
            +
                sr, melody_data = melody[0], melody[1]
         | 
| 107 | 
            +
                segment_samples = sr * 30
         | 
| 108 | 
            +
                total_melodys = max(min((len(melody_data) // segment_samples) - 1, 25), 0) 
         | 
| 109 | 
            +
                print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
         | 
| 110 | 
            +
                MAX_PROMPT_INDEX = total_melodys
         | 
| 111 | 
            +
                return  gr.Slider.update(maximum=MAX_PROMPT_INDEX, value=0, visible=True)
         | 
| 112 | 
            +
                     
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            def predict(model, text, melody, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True):
         | 
| 115 | 
            +
                global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
         | 
| 116 | 
             
                output_segments = None
         | 
| 117 | 
            +
                melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
         | 
| 118 | 
             
                INTERRUPTED = False
         | 
| 119 | 
             
                INTERRUPTING = False
         | 
| 120 | 
             
                if temperature < 0:
         | 
|  | |
| 171 | 
             
                    if melody:
         | 
| 172 | 
             
                        # todo return excess duration, load next model and continue in loop structure building up output_segments
         | 
| 173 | 
             
                        if duration > MODEL.lm.cfg.dataset.segment_duration:
         | 
| 174 | 
            +
                            output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index)
         | 
| 175 | 
             
                        else:
         | 
| 176 | 
             
                            # pure original code
         | 
| 177 | 
             
                            sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
         | 
|  | |
| 236 | 
             
                else:
         | 
| 237 | 
             
                    output = output.detach().cpu().float()[0]
         | 
| 238 |  | 
| 239 | 
            +
                with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:        
         | 
| 240 | 
            +
                    video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Prompt index: {prompt_index}"
         | 
| 241 | 
            +
                    if include_settings or include_title:
         | 
| 242 | 
            +
                        background = add_settings_to_image(title if include_title else "", video_description if include_settings else "", background_path=background, font=settings_font, font_color=settings_font_color)
         | 
| 243 | 
             
                    audio_write(
         | 
| 244 | 
             
                        file.name, output, MODEL.sample_rate, strategy="loudness",
         | 
| 245 | 
             
                        loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
         | 
|  | |
| 255 | 
             
            def ui(**kwargs):
         | 
| 256 | 
             
                css="""
         | 
| 257 | 
             
                #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
         | 
| 258 | 
            +
                #aud-melody {height: 0; width:0; visibility: hidden;}
         | 
| 259 | 
             
                a {text-decoration-line: underline; font-weight: 600;}
         | 
| 260 | 
             
                """
         | 
| 261 | 
             
                with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
         | 
|  | |
| 281 | 
             
                    with gr.Row():
         | 
| 282 | 
             
                        with gr.Column():
         | 
| 283 | 
             
                            with gr.Row():
         | 
| 284 | 
            +
                                text = gr.Text(label="Prompt Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
         | 
| 285 | 
            +
                                with gr.Column():
         | 
| 286 | 
            +
                                    melody_filepath = gr.Audio(source="upload", type="filepath", label="Melody Condition (optional)", interactive=True)
         | 
| 287 | 
            +
                                    melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True, visible=True, elem_id="aud-melody")#.style("display: none;height: 0; width:0;")
         | 
| 288 | 
            +
                                    prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1 condition each segment independantly")                        
         | 
| 289 | 
             
                            with gr.Row():
         | 
| 290 | 
             
                                submit = gr.Button("Submit")
         | 
| 291 | 
             
                                # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
         | 
| 292 | 
             
                                _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
         | 
| 293 | 
             
                            with gr.Row():
         | 
| 294 | 
             
                                background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
         | 
| 295 | 
            +
                                with gr.Column():
         | 
| 296 | 
            +
                                    include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
         | 
| 297 | 
            +
                                    include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
         | 
| 298 | 
             
                            with gr.Row():
         | 
| 299 | 
             
                                title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
         | 
| 300 | 
             
                                settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
         | 
|  | |
| 303 | 
             
                                model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
         | 
| 304 | 
             
                            with gr.Row():
         | 
| 305 | 
             
                                duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration", interactive=True)
         | 
| 306 | 
            +
                                overlap = gr.Slider(minimum=1, maximum=15, value=3, step=1, label="Overlap", interactive=True)
         | 
| 307 | 
             
                                dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
         | 
| 308 | 
             
                            with gr.Row():
         | 
| 309 | 
             
                                topk = gr.Number(label="Top-k", value=250, precision=0, interactive=True)
         | 
|  | |
| 318 | 
             
                            output = gr.Video(label="Generated Music")
         | 
| 319 | 
             
                            seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
         | 
| 320 |  | 
| 321 | 
            +
                    melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title], outputs=[melody, title], api_name="melody_filepath_change").success(load_melody, inputs=[melody, prompt_index], outputs=[prompt_index])
         | 
| 322 | 
            +
                    melody.change(load_melody, inputs=[melody, prompt_index], outputs=[prompt_index], api_name="melody_change")
         | 
| 323 | 
            +
                    reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed")
         | 
| 324 | 
            +
                    submit.click(predict, inputs=[model, text, melody, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings], outputs=[output, seed_used], api_name="submit")
         | 
| 325 | 
             
                    gr.Examples(
         | 
| 326 | 
             
                        fn=predict,
         | 
| 327 | 
             
                        examples=[
         | 
|  | |
| 360 | 
             
                    share = kwargs.get('share', False)
         | 
| 361 | 
             
                    if share:
         | 
| 362 | 
             
                        launch_kwargs['share'] = share
         | 
| 363 | 
            +
                    launch_kwargs['favicon_path']= "./assets/favicon.ico"
         | 
| 364 |  | 
| 365 |  | 
| 366 |  | 
| 367 | 
            +
                    demo.queue(max_size=12).launch(**launch_kwargs)
         | 
| 368 |  | 
| 369 | 
             
            if __name__ == "__main__":
         | 
| 370 | 
             
                parser = argparse.ArgumentParser()
         | 
    	
        assets/favicon.ico
    ADDED
    
    |  | 
    	
        audiocraft/utils/extend.py
    CHANGED
    
    | @@ -18,7 +18,7 @@ INTERRUPTING = False | |
| 18 | 
             
            def separate_audio_segments(audio, segment_duration=30, overlap=1):
         | 
| 19 | 
             
                sr, audio_data = audio[0], audio[1]
         | 
| 20 |  | 
| 21 | 
            -
                total_samples =  | 
| 22 | 
             
                segment_samples = sr * segment_duration
         | 
| 23 | 
             
                overlap_samples = sr * overlap
         | 
| 24 |  | 
| @@ -43,15 +43,16 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1): | |
| 43 | 
             
                print(f"separate_audio_segments: {len(segments)} segments")
         | 
| 44 | 
             
                return segments
         | 
| 45 |  | 
| 46 | 
            -
            def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30):
         | 
| 47 | 
             
                # generate audio segments
         | 
| 48 | 
             
                melody_segments = separate_audio_segments(melody, segment_duration, 0) 
         | 
| 49 |  | 
| 50 | 
            -
                # Create  | 
| 51 | 
             
                melodys = []
         | 
| 52 | 
             
                output_segments = []
         | 
| 53 | 
             
                last_chunk = []
         | 
| 54 | 
             
                text += ", seed=" + str(seed)
         | 
|  | |
| 55 |  | 
| 56 | 
             
                # Calculate the total number of segments
         | 
| 57 | 
             
                total_segments = max(math.ceil(duration / segment_duration),1)
         | 
| @@ -94,55 +95,63 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap: | |
| 94 | 
             
                    melodys.append(verse)
         | 
| 95 |  | 
| 96 | 
             
                torch.manual_seed(seed)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 97 | 
             
                for idx, verse in enumerate(melodys):
         | 
| 98 | 
             
                    if INTERRUPTING:
         | 
| 99 | 
             
                        return output_segments, duration
         | 
| 100 |  | 
| 101 | 
             
                    print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss}')
         | 
| 102 | 
             
                    # Compensate for the length of final segment
         | 
| 103 | 
            -
                    if (idx + 1) == len(melodys):
         | 
| 104 | 
            -
                         | 
|  | |
| 105 | 
             
                        MODEL.set_generation_params(
         | 
| 106 | 
             
                            use_sampling=True,
         | 
| 107 | 
             
                            top_k=MODEL.generation_params["top_k"],
         | 
| 108 | 
             
                            top_p=MODEL.generation_params["top_p"],
         | 
| 109 | 
             
                            temperature=MODEL.generation_params["temp"],
         | 
| 110 | 
             
                            cfg_coef=MODEL.generation_params["cfg_coef"],
         | 
| 111 | 
            -
                            duration= | 
| 112 | 
             
                            two_step_cfg=False,
         | 
| 113 | 
             
                            rep_penalty=0.5
         | 
| 114 | 
             
                        )
         | 
| 115 | 
             
                        try:
         | 
| 116 | 
             
                            # get last chunk
         | 
| 117 | 
            -
                            verse = verse[:, :, - | 
| 118 | 
            -
                            prompt_segment = prompt_segment[:, :, - | 
| 119 | 
             
                        except:
         | 
| 120 | 
             
                            # get first chunk
         | 
| 121 | 
            -
                            verse = verse[:, :, : | 
| 122 | 
            -
                            prompt_segment = prompt_segment[:, :, : | 
| 123 | 
            -
             | 
| 124 | 
            -
                    else:            
         | 
| 125 | 
            -
                        MODEL.set_generation_params(
         | 
| 126 | 
            -
                            use_sampling=True,
         | 
| 127 | 
            -
                            top_k=MODEL.generation_params["top_k"],
         | 
| 128 | 
            -
                            top_p=MODEL.generation_params["top_p"],
         | 
| 129 | 
            -
                            temperature=MODEL.generation_params["temp"],
         | 
| 130 | 
            -
                            cfg_coef=MODEL.generation_params["cfg_coef"],
         | 
| 131 | 
            -
                            duration=segment_duration,
         | 
| 132 | 
            -
                            two_step_cfg=False,
         | 
| 133 | 
            -
                            rep_penalty=0.5
         | 
| 134 | 
            -
                        )                    
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                    # Generate a new prompt segment based on the first verse. This will be applied to all segments for consistency
         | 
| 137 | 
            -
                    if idx == 0:
         | 
| 138 | 
            -
                        print(f"Generating New Prompt Segment: {text}\r")
         | 
| 139 | 
            -
                        prompt_segment = MODEL.generate_with_all(
         | 
| 140 | 
            -
                            descriptions=[text],
         | 
| 141 | 
            -
                            melody_wavs=verse,
         | 
| 142 | 
            -
                            sample_rate=sr,
         | 
| 143 | 
            -
                            progress=False,
         | 
| 144 | 
            -
                            prompt=None,
         | 
| 145 | 
            -
                        )            
         | 
| 146 |  | 
| 147 | 
             
                    print(f"Generating New Melody Segment {idx + 1}: {text}\r")
         | 
| 148 | 
             
                    output = MODEL.generate_with_all(
         | 
| @@ -152,6 +161,10 @@ def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap: | |
| 152 | 
             
                        progress=False,
         | 
| 153 | 
             
                        prompt=prompt_segment,
         | 
| 154 | 
             
                    )
         | 
|  | |
|  | |
|  | |
|  | |
| 155 |  | 
| 156 | 
             
                    # Append the generated output to the list of segments
         | 
| 157 | 
             
                    #output_segments.append(output[:, :segment_duration])
         | 
|  | |
| 18 | 
             
            def separate_audio_segments(audio, segment_duration=30, overlap=1):
         | 
| 19 | 
             
                sr, audio_data = audio[0], audio[1]
         | 
| 20 |  | 
| 21 | 
            +
                total_samples = len(audio_data)
         | 
| 22 | 
             
                segment_samples = sr * segment_duration
         | 
| 23 | 
             
                overlap_samples = sr * overlap
         | 
| 24 |  | 
|  | |
| 43 | 
             
                print(f"separate_audio_segments: {len(segments)} segments")
         | 
| 44 | 
             
                return segments
         | 
| 45 |  | 
| 46 | 
            +
            def generate_music_segments(text, melody, seed, MODEL, duration:int=10, overlap:int=1, segment_duration:int=30, prompt_index:int=0):
         | 
| 47 | 
             
                # generate audio segments
         | 
| 48 | 
             
                melody_segments = separate_audio_segments(melody, segment_duration, 0) 
         | 
| 49 |  | 
| 50 | 
            +
                # Create lists to store the melody tensors for each segment
         | 
| 51 | 
             
                melodys = []
         | 
| 52 | 
             
                output_segments = []
         | 
| 53 | 
             
                last_chunk = []
         | 
| 54 | 
             
                text += ", seed=" + str(seed)
         | 
| 55 | 
            +
                prompt_segment = None
         | 
| 56 |  | 
| 57 | 
             
                # Calculate the total number of segments
         | 
| 58 | 
             
                total_segments = max(math.ceil(duration / segment_duration),1)
         | 
|  | |
| 95 | 
             
                    melodys.append(verse)
         | 
| 96 |  | 
| 97 | 
             
                torch.manual_seed(seed)
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                # If user selects a prompt segment, generate a new prompt segment to use on all segments
         | 
| 100 | 
            +
                #default to the first segment for prompt conditioning
         | 
| 101 | 
            +
                prompt_verse = melodys[0]
         | 
| 102 | 
            +
                if prompt_index > 0:
         | 
| 103 | 
            +
                    # Get a prompt segment from the selected verse, normally the first verse
         | 
| 104 | 
            +
                    prompt_verse = melodys[prompt_index if prompt_index <= (total_segments - 1) else (total_segments -1)]
         | 
| 105 | 
            +
                   
         | 
| 106 | 
            +
                # set the prompt segment MODEL generation params
         | 
| 107 | 
            +
                MODEL.set_generation_params(
         | 
| 108 | 
            +
                    use_sampling=True,
         | 
| 109 | 
            +
                    top_k=MODEL.generation_params["top_k"],
         | 
| 110 | 
            +
                    top_p=MODEL.generation_params["top_p"],
         | 
| 111 | 
            +
                    temperature=MODEL.generation_params["temp"],
         | 
| 112 | 
            +
                    cfg_coef=MODEL.generation_params["cfg_coef"],
         | 
| 113 | 
            +
                    duration=segment_duration,
         | 
| 114 | 
            +
                    two_step_cfg=False,
         | 
| 115 | 
            +
                    rep_penalty=0.5
         | 
| 116 | 
            +
                )
         | 
| 117 | 
            +
                # Generate a new prompt segment. This will be applied to all segments for consistency
         | 
| 118 | 
            +
                print(f"Generating New Prompt Segment: {text} from verse {prompt_index}\r")
         | 
| 119 | 
            +
                prompt_segment = MODEL.generate_with_all(
         | 
| 120 | 
            +
                    descriptions=[text],
         | 
| 121 | 
            +
                    melody_wavs=prompt_verse,
         | 
| 122 | 
            +
                    sample_rate=sr,
         | 
| 123 | 
            +
                    progress=False,
         | 
| 124 | 
            +
                    prompt=None,
         | 
| 125 | 
            +
                )       
         | 
| 126 | 
            +
             | 
| 127 | 
             
                for idx, verse in enumerate(melodys):
         | 
| 128 | 
             
                    if INTERRUPTING:
         | 
| 129 | 
             
                        return output_segments, duration
         | 
| 130 |  | 
| 131 | 
             
                    print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss}')
         | 
| 132 | 
             
                    # Compensate for the length of final segment
         | 
| 133 | 
            +
                    if ((idx + 1) == len(melodys)) or (duration < segment_duration):
         | 
| 134 | 
            +
                        mod_duration = max(min(duration, segment_duration),1)
         | 
| 135 | 
            +
                        print(f'Modify verse length, duration: {duration}, overlap: {overlap} Overlap Loss: {duration_loss} to mod duration: {mod_duration}')
         | 
| 136 | 
             
                        MODEL.set_generation_params(
         | 
| 137 | 
             
                            use_sampling=True,
         | 
| 138 | 
             
                            top_k=MODEL.generation_params["top_k"],
         | 
| 139 | 
             
                            top_p=MODEL.generation_params["top_p"],
         | 
| 140 | 
             
                            temperature=MODEL.generation_params["temp"],
         | 
| 141 | 
             
                            cfg_coef=MODEL.generation_params["cfg_coef"],
         | 
| 142 | 
            +
                            duration=mod_duration,
         | 
| 143 | 
             
                            two_step_cfg=False,
         | 
| 144 | 
             
                            rep_penalty=0.5
         | 
| 145 | 
             
                        )
         | 
| 146 | 
             
                        try:
         | 
| 147 | 
             
                            # get last chunk
         | 
| 148 | 
            +
                            verse = verse[:, :, -mod_duration*MODEL.sample_rate:]
         | 
| 149 | 
            +
                            prompt_segment = prompt_segment[:, :, -mod_duration*MODEL.sample_rate:]
         | 
| 150 | 
             
                        except:
         | 
| 151 | 
             
                            # get first chunk
         | 
| 152 | 
            +
                            verse = verse[:, :, :mod_duration*MODEL.sample_rate] 
         | 
| 153 | 
            +
                            prompt_segment = prompt_segment[:, :, :mod_duration*MODEL.sample_rate]
         | 
| 154 | 
            +
                  
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 155 |  | 
| 156 | 
             
                    print(f"Generating New Melody Segment {idx + 1}: {text}\r")
         | 
| 157 | 
             
                    output = MODEL.generate_with_all(
         | 
|  | |
| 161 | 
             
                        progress=False,
         | 
| 162 | 
             
                        prompt=prompt_segment,
         | 
| 163 | 
             
                    )
         | 
| 164 | 
            +
                    # If user selects a prompt segment, use the prompt segment for all segments
         | 
| 165 | 
            +
                    # Otherwise, use the previous segment as the prompt
         | 
| 166 | 
            +
                    if prompt_index < 0:
         | 
| 167 | 
            +
                        prompt_segment = output
         | 
| 168 |  | 
| 169 | 
             
                    # Append the generated output to the list of segments
         | 
| 170 | 
             
                    #output_segments.append(output[:, :segment_duration])
         | 
