Update stablecuda12build1.py
Browse files16 bit and addition in audio have diff maths for each float fixed up errors
- stablecuda12build1.py +50 -38
    	
        stablecuda12build1.py
    CHANGED
    
    | @@ -19,6 +19,7 @@ from pathlib import Path | |
| 19 | 
             
            import mmap
         | 
| 20 | 
             
            import subprocess
         | 
| 21 | 
             
            import re
         | 
|  | |
| 22 |  | 
| 23 | 
             
            # Suppress warnings for cleaner output
         | 
| 24 | 
             
            warnings.filterwarnings("ignore")
         | 
| @@ -149,13 +150,16 @@ def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=16000): | |
| 149 | 
             
                        stereo_samples = stereo_samples * mask
         | 
| 150 | 
             
                        left_nonzero = stereo_samples[:, 0][stereo_samples[:, 0] != 0]
         | 
| 151 | 
             
                        right_nonzero = stereo_samples[:, 1][stereo_samples[:, 1] != 0]
         | 
| 152 | 
            -
                        left_rms = np.sqrt(np.mean(left_nonzero**2)) if len(left_nonzero) > 0  | 
| 153 | 
             
                        right_rms = np.sqrt(np.mean(right_nonzero**2)) if len(right_nonzero) > 0 else 0
         | 
| 154 | 
             
                        if left_rms > 0 and right_rms > 0:
         | 
| 155 | 
             
                            avg_rms = (left_rms + right_rms) / 2
         | 
| 156 | 
             
                            stereo_samples[:, 0] = stereo_samples[:, 0] * (avg_rms / left_rms)
         | 
| 157 | 
             
                            stereo_samples[:, 1] = stereo_samples[:, 1] * (avg_rms / right_rms)
         | 
| 158 | 
             
                        balanced_samples = stereo_samples.flatten().astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
         | 
|  | |
|  | |
|  | |
| 159 | 
             
                        balanced_segment = AudioSegment(
         | 
| 160 | 
             
                            balanced_samples.tobytes(),
         | 
| 161 | 
             
                            frame_rate=sample_rate,
         | 
| @@ -206,6 +210,9 @@ def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000): | |
| 206 | 
             
                    limit = 10 ** (limit_db / 20.0) * (2**23 if audio_segment.sample_width == 3 else 32767)
         | 
| 207 | 
             
                    samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
         | 
| 208 | 
             
                    samples = np.clip(samples, -limit, limit).astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
         | 
|  | |
|  | |
|  | |
| 209 | 
             
                    limited_segment = AudioSegment(
         | 
| 210 | 
             
                        samples.tobytes(),
         | 
| 211 | 
             
                        frame_rate=sample_rate,
         | 
| @@ -248,12 +255,15 @@ def apply_fade(segment, fade_in_duration=500, fade_out_duration=500): | |
| 248 | 
             
            # Genre prompt functions
         | 
| 249 | 
             
            def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
         | 
| 250 | 
             
                try:
         | 
| 251 | 
            -
                    rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else (" | 
| 252 | 
            -
                    drum = f", {drum_beat} drums" if drum_beat != "none" else ""
         | 
| 253 | 
             
                    synth = f", {synthesizer} accents" if synthesizer != "none" else ""
         | 
| 254 | 
            -
                    bass = f", {bass_style}" if bass_style != "none" else ",  | 
| 255 | 
            -
                    guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
         | 
| 256 | 
            -
                    prompt =  | 
|  | |
|  | |
|  | |
| 257 | 
             
                    logger.debug(f"Generated RHCP prompt: {prompt}")
         | 
| 258 | 
             
                    return prompt
         | 
| 259 | 
             
                except Exception as e:
         | 
| @@ -468,7 +478,8 @@ PRESETS = { | |
| 468 | 
             
                "rock": {"cfg_scale": 2.0, "top_k": 110, "top_p": 0.9, "temperature": 0.9},
         | 
| 469 | 
             
                "techno": {"cfg_scale": 1.5, "top_k": 130, "top_p": 0.85, "temperature": 0.7},
         | 
| 470 | 
             
                "grunge": {"cfg_scale": 1.8, "top_k": 120, "top_p": 0.9, "temperature": 0.85},
         | 
| 471 | 
            -
                "indie": {"cfg_scale": 1.9, "top_k": 115, "top_p": 0.9, "temperature": 0.8}
         | 
|  | |
| 472 | 
             
            }
         | 
| 473 |  | 
| 474 | 
             
            # Function to get the latest log file
         | 
| @@ -523,7 +534,7 @@ def set_bit_depth_24(): | |
| 523 | 
             
                return "24"
         | 
| 524 |  | 
| 525 | 
             
            # Optimized generation function
         | 
| 526 | 
            -
            def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, max_steps: str, vram_status: str, bitrate: str, output_sample_rate: str, bit_depth: str):
         | 
| 527 | 
             
                global musicgen_model
         | 
| 528 | 
             
                if not instrumental_prompt.strip():
         | 
| 529 | 
             
                    logger.warning("Empty instrumental prompt provided")
         | 
| @@ -550,6 +561,10 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p | |
| 550 | 
             
                    except ValueError:
         | 
| 551 | 
             
                        logger.error(f"Invalid bit_depth value: {bit_depth}")
         | 
| 552 | 
             
                        return None, "❌ Invalid bit depth; must be 16 or 24", vram_status
         | 
|  | |
|  | |
|  | |
|  | |
| 553 | 
             
                    max_duration = min(max_steps_int / 50, 30)  # Convert steps to seconds, cap at 30s
         | 
| 554 | 
             
                    total_duration = min(max(total_duration, 30), 120)  # Clamp between 30s and 120s
         | 
| 555 | 
             
                    processing_sample_rate = 16000  # Fixed for processing
         | 
| @@ -570,8 +585,6 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p | |
| 570 | 
             
                        logger.error("Insufficient disk space")
         | 
| 571 | 
             
                        return None, "⚠️ Insufficient disk space. Free up at least 1 GB.", vram_status
         | 
| 572 |  | 
| 573 | 
            -
                    # Set random seed for this generation run
         | 
| 574 | 
            -
                    seed = random.randint(0, 10000)
         | 
| 575 | 
             
                    logger.info(f"Generating audio for {total_duration}s with seed={seed}, max_steps={max_steps_int}, output_sample_rate={output_sample_rate_int} Hz, bit_depth={bit_depth_int}-bit")
         | 
| 576 | 
             
                    base_prompt = instrumental_prompt
         | 
| 577 | 
             
                    clean_memory()
         | 
| @@ -731,10 +744,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p | |
| 731 | 
             
                                logger.debug(f"Applying crossfade between chunks {i} and {i+1}")
         | 
| 732 | 
             
                                prev_overlap = final_segment[-overlap_ms:]
         | 
| 733 | 
             
                                curr_overlap = current_segment[:overlap_ms]
         | 
| 734 | 
            -
                                #  | 
| 735 | 
            -
                                prev_overlap = ensure_stereo(prev_overlap, processing_sample_rate, sample_width)
         | 
| 736 | 
            -
                                curr_overlap = ensure_stereo(curr_overlap, processing_sample_rate, sample_width)
         | 
| 737 | 
            -
                                # Calculate samples using torchaudio for precision
         | 
| 738 | 
             
                                prev_audio, _ = torchaudio.load(io.BytesIO(prev_overlap.raw_data))
         | 
| 739 | 
             
                                curr_audio, _ = torchaudio.load(io.BytesIO(curr_overlap.raw_data))
         | 
| 740 | 
             
                                num_samples = min(prev_audio.shape[1], curr_audio.shape[1])
         | 
| @@ -744,27 +754,21 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p | |
| 744 | 
             
                                    logger.warning(f"Skipping crossfade for chunk {i+1} due to insufficient samples")
         | 
| 745 | 
             
                                    final_segment += current_segment
         | 
| 746 | 
             
                                    continue
         | 
| 747 | 
            -
                                blended_samples =  | 
| 748 | 
            -
                                prev_samples = prev_audio[:, :num_samples] | 
| 749 | 
            -
                                curr_samples = curr_audio[:, :num_samples] | 
| 750 | 
            -
                                hann_window =  | 
| 751 | 
            -
                                fade_out = hann_window | 
| 752 | 
             
                                fade_in = hann_window
         | 
| 753 | 
            -
                                blended_samples = (prev_samples * fade_out | 
| 754 | 
            -
                                #  | 
| 755 | 
            -
                                blended_samples = blended_samples. | 
| 756 | 
            -
                                 | 
| 757 | 
            -
                                 | 
| 758 | 
            -
                                 | 
| 759 | 
            -
                                 | 
| 760 | 
            -
             | 
| 761 | 
            -
             | 
| 762 | 
            -
                                blended_segment = AudioSegment(
         | 
| 763 | 
            -
                                    byte_data,
         | 
| 764 | 
            -
                                    frame_rate=processing_sample_rate,
         | 
| 765 | 
            -
                                    sample_width=sample_width,
         | 
| 766 | 
            -
                                    channels=2
         | 
| 767 | 
            -
                                )
         | 
| 768 | 
             
                                blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
         | 
| 769 | 
             
                                final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
         | 
| 770 | 
             
                            else:
         | 
| @@ -822,7 +826,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p | |
| 822 | 
             
            # Clear inputs function
         | 
| 823 | 
             
            def clear_inputs():
         | 
| 824 | 
             
                logger.info("Clearing input fields")
         | 
| 825 | 
            -
                return "", 1.8, 120, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", 1300, "96k", "32000", "16"
         | 
| 826 |  | 
| 827 | 
             
            # Custom CSS
         | 
| 828 | 
             
            css = """
         | 
| @@ -1024,7 +1028,7 @@ with gr.Blocks(css=css) as demo: | |
| 1024 | 
             
                        )
         | 
| 1025 | 
             
                        preset = gr.Dropdown(
         | 
| 1026 | 
             
                            label="Preset Configuration 🎛️",
         | 
| 1027 | 
            -
                            choices=["default", "rock", "techno", "grunge", "indie"],
         | 
| 1028 | 
             
                            value="default",
         | 
| 1029 | 
             
                            info="Select a preset optimized for specific genres."
         | 
| 1030 | 
             
                        )
         | 
| @@ -1034,6 +1038,14 @@ with gr.Blocks(css=css) as demo: | |
| 1034 | 
             
                            value=1300,
         | 
| 1035 | 
             
                            info="Number of generation steps per chunk (1000=~20s, 1500=~30s)."
         | 
| 1036 | 
             
                        )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1037 | 
             
                        bitrate_state = gr.State(value="96k")  # Default bitrate
         | 
| 1038 | 
             
                        sample_rate_state = gr.State(value="32000")  # Default output sampling rate
         | 
| 1039 | 
             
                        bit_depth_state = gr.State(value="16")  # Default bit depth
         | 
| @@ -1088,13 +1100,13 @@ with gr.Blocks(css=css) as demo: | |
| 1088 | 
             
                bit_depth_24_btn.click(set_bit_depth_24, inputs=None, outputs=bit_depth_state)
         | 
| 1089 | 
             
                gen_btn.click(
         | 
| 1090 | 
             
                    generate_music,
         | 
| 1091 | 
            -
                    inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, vram_status, bitrate_state, sample_rate_state, bit_depth_state],
         | 
| 1092 | 
             
                    outputs=[out_audio, status, vram_status]
         | 
| 1093 | 
             
                )
         | 
| 1094 | 
             
                clr_btn.click(
         | 
| 1095 | 
             
                    clear_inputs,
         | 
| 1096 | 
             
                    inputs=None,
         | 
| 1097 | 
            -
                    outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state]
         | 
| 1098 | 
             
                )
         | 
| 1099 | 
             
                log_btn.click(
         | 
| 1100 | 
             
                    get_latest_log,
         | 
|  | |
| 19 | 
             
            import mmap
         | 
| 20 | 
             
            import subprocess
         | 
| 21 | 
             
            import re
         | 
| 22 | 
            +
            import io
         | 
| 23 |  | 
| 24 | 
             
            # Suppress warnings for cleaner output
         | 
| 25 | 
             
            warnings.filterwarnings("ignore")
         | 
|  | |
| 150 | 
             
                        stereo_samples = stereo_samples * mask
         | 
| 151 | 
             
                        left_nonzero = stereo_samples[:, 0][stereo_samples[:, 0] != 0]
         | 
| 152 | 
             
                        right_nonzero = stereo_samples[:, 1][stereo_samples[:, 1] != 0]
         | 
| 153 | 
            +
                        left_rms = np.sqrt(np.mean(left_nonzero**2)) if len(left_nonzero) > 0 else 0
         | 
| 154 | 
             
                        right_rms = np.sqrt(np.mean(right_nonzero**2)) if len(right_nonzero) > 0 else 0
         | 
| 155 | 
             
                        if left_rms > 0 and right_rms > 0:
         | 
| 156 | 
             
                            avg_rms = (left_rms + right_rms) / 2
         | 
| 157 | 
             
                            stereo_samples[:, 0] = stereo_samples[:, 0] * (avg_rms / left_rms)
         | 
| 158 | 
             
                            stereo_samples[:, 1] = stereo_samples[:, 1] * (avg_rms / right_rms)
         | 
| 159 | 
             
                        balanced_samples = stereo_samples.flatten().astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
         | 
| 160 | 
            +
                        # Ensure sample length is even for stereo
         | 
| 161 | 
            +
                        if len(balanced_samples) % 2 != 0:
         | 
| 162 | 
            +
                            balanced_samples = balanced_samples[:-1]
         | 
| 163 | 
             
                        balanced_segment = AudioSegment(
         | 
| 164 | 
             
                            balanced_samples.tobytes(),
         | 
| 165 | 
             
                            frame_rate=sample_rate,
         | 
|  | |
| 210 | 
             
                    limit = 10 ** (limit_db / 20.0) * (2**23 if audio_segment.sample_width == 3 else 32767)
         | 
| 211 | 
             
                    samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
         | 
| 212 | 
             
                    samples = np.clip(samples, -limit, limit).astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
         | 
| 213 | 
            +
                    # Ensure sample length is even for stereo
         | 
| 214 | 
            +
                    if len(samples) % 2 != 0:
         | 
| 215 | 
            +
                        samples = samples[:-1]
         | 
| 216 | 
             
                    limited_segment = AudioSegment(
         | 
| 217 | 
             
                        samples.tobytes(),
         | 
| 218 | 
             
                        frame_rate=sample_rate,
         | 
|  | |
| 255 | 
             
            # Genre prompt functions
         | 
| 256 | 
             
            def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
         | 
| 257 | 
             
                try:
         | 
| 258 | 
            +
                    rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("syncopated funk rhythms" if bpm > 120 else "groovy funk flow")
         | 
| 259 | 
            +
                    drum = f", {drum_beat} drums" if drum_beat != "none" else ", tight funk drums with punchy snares"
         | 
| 260 | 
             
                    synth = f", {synthesizer} accents" if synthesizer != "none" else ""
         | 
| 261 | 
            +
                    bass = f", {bass_style}" if bass_style != "none" else ", prominent slap bass with funky grooves"
         | 
| 262 | 
            +
                    guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated funk guitar riffs with clean and distorted tones"
         | 
| 263 | 
            +
                    prompt = (
         | 
| 264 | 
            +
                        f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Red Hot Chili Peppers-inspired vibe with high-energy slap bass, "
         | 
| 265 | 
            +
                        f"syncopated guitar riffs, dynamic breakdowns, and a raw, funky edge, {rhythm} at {bpm} BPM."
         | 
| 266 | 
            +
                    )
         | 
| 267 | 
             
                    logger.debug(f"Generated RHCP prompt: {prompt}")
         | 
| 268 | 
             
                    return prompt
         | 
| 269 | 
             
                except Exception as e:
         | 
|  | |
| 478 | 
             
                "rock": {"cfg_scale": 2.0, "top_k": 110, "top_p": 0.9, "temperature": 0.9},
         | 
| 479 | 
             
                "techno": {"cfg_scale": 1.5, "top_k": 130, "top_p": 0.85, "temperature": 0.7},
         | 
| 480 | 
             
                "grunge": {"cfg_scale": 1.8, "top_k": 120, "top_p": 0.9, "temperature": 0.85},
         | 
| 481 | 
            +
                "indie": {"cfg_scale": 1.9, "top_k": 115, "top_p": 0.9, "temperature": 0.8},
         | 
| 482 | 
            +
                "funk_rock": {"cfg_scale": 2.2, "top_k": 150, "top_p": 0.95, "temperature": 1.0}  # Enhanced for RHCP
         | 
| 483 | 
             
            }
         | 
| 484 |  | 
| 485 | 
             
            # Function to get the latest log file
         | 
|  | |
| 534 | 
             
                return "24"
         | 
| 535 |  | 
| 536 | 
             
            # Optimized generation function
         | 
| 537 | 
            +
            def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, max_steps: str, vram_status: str, bitrate: str, output_sample_rate: str, bit_depth: str, seed: int):
         | 
| 538 | 
             
                global musicgen_model
         | 
| 539 | 
             
                if not instrumental_prompt.strip():
         | 
| 540 | 
             
                    logger.warning("Empty instrumental prompt provided")
         | 
|  | |
| 561 | 
             
                    except ValueError:
         | 
| 562 | 
             
                        logger.error(f"Invalid bit_depth value: {bit_depth}")
         | 
| 563 | 
             
                        return None, "❌ Invalid bit depth; must be 16 or 24", vram_status
         | 
| 564 | 
            +
                    # Validate seed
         | 
| 565 | 
            +
                    if not (0 <= seed <= 10000):
         | 
| 566 | 
            +
                        logger.error(f"Invalid seed value: {seed}. Must be between 0 and 10000.")
         | 
| 567 | 
            +
                        return None, "❌ Invalid seed value; must be between 0 and 10000", vram_status
         | 
| 568 | 
             
                    max_duration = min(max_steps_int / 50, 30)  # Convert steps to seconds, cap at 30s
         | 
| 569 | 
             
                    total_duration = min(max(total_duration, 30), 120)  # Clamp between 30s and 120s
         | 
| 570 | 
             
                    processing_sample_rate = 16000  # Fixed for processing
         | 
|  | |
| 585 | 
             
                        logger.error("Insufficient disk space")
         | 
| 586 | 
             
                        return None, "⚠️ Insufficient disk space. Free up at least 1 GB.", vram_status
         | 
| 587 |  | 
|  | |
|  | |
| 588 | 
             
                    logger.info(f"Generating audio for {total_duration}s with seed={seed}, max_steps={max_steps_int}, output_sample_rate={output_sample_rate_int} Hz, bit_depth={bit_depth_int}-bit")
         | 
| 589 | 
             
                    base_prompt = instrumental_prompt
         | 
| 590 | 
             
                    clean_memory()
         | 
|  | |
| 744 | 
             
                                logger.debug(f"Applying crossfade between chunks {i} and {i+1}")
         | 
| 745 | 
             
                                prev_overlap = final_segment[-overlap_ms:]
         | 
| 746 | 
             
                                curr_overlap = current_segment[:overlap_ms]
         | 
| 747 | 
            +
                                # Use torchaudio for precise crossfading
         | 
|  | |
|  | |
|  | |
| 748 | 
             
                                prev_audio, _ = torchaudio.load(io.BytesIO(prev_overlap.raw_data))
         | 
| 749 | 
             
                                curr_audio, _ = torchaudio.load(io.BytesIO(curr_overlap.raw_data))
         | 
| 750 | 
             
                                num_samples = min(prev_audio.shape[1], curr_audio.shape[1])
         | 
|  | |
| 754 | 
             
                                    logger.warning(f"Skipping crossfade for chunk {i+1} due to insufficient samples")
         | 
| 755 | 
             
                                    final_segment += current_segment
         | 
| 756 | 
             
                                    continue
         | 
| 757 | 
            +
                                blended_samples = torch.zeros(2, num_samples, dtype=torch.float32)
         | 
| 758 | 
            +
                                prev_samples = prev_audio[:, :num_samples]
         | 
| 759 | 
            +
                                curr_samples = curr_audio[:, :num_samples]
         | 
| 760 | 
            +
                                hann_window = torch.hann_window(num_samples, periodic=False)
         | 
| 761 | 
            +
                                fade_out = hann_window.flip(0)
         | 
| 762 | 
             
                                fade_in = hann_window
         | 
| 763 | 
            +
                                blended_samples = (prev_samples * fade_out + curr_samples * fade_in)
         | 
| 764 | 
            +
                                # Convert to appropriate dtype for bit depth
         | 
| 765 | 
            +
                                blended_samples = (blended_samples * (2**23 if sample_width == 3 else 32767)).to(torch.int32 if sample_width == 3 else torch.int16)
         | 
| 766 | 
            +
                                # Save to temporary WAV to create AudioSegment
         | 
| 767 | 
            +
                                temp_crossfade_path = f"temp_crossfade_{int(time.time()*1000)}.wav"
         | 
| 768 | 
            +
                                torchaudio.save(temp_crossfade_path, blended_samples, processing_sample_rate, bits_per_sample=bit_depth_int)
         | 
| 769 | 
            +
                                blended_segment = AudioSegment.from_wav(temp_crossfade_path)
         | 
| 770 | 
            +
                                os.remove(temp_crossfade_path)
         | 
| 771 | 
            +
                                blended_segment = ensure_stereo(blended_segment, processing_sample_rate, sample_width)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 772 | 
             
                                blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
         | 
| 773 | 
             
                                final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
         | 
| 774 | 
             
                            else:
         | 
|  | |
| 826 | 
             
            # Clear inputs function
         | 
| 827 | 
             
            def clear_inputs():
         | 
| 828 | 
             
                logger.info("Clearing input fields")
         | 
| 829 | 
            +
                return "", 1.8, 120, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", 1300, "96k", "32000", "16", 0
         | 
| 830 |  | 
| 831 | 
             
            # Custom CSS
         | 
| 832 | 
             
            css = """
         | 
|  | |
| 1028 | 
             
                        )
         | 
| 1029 | 
             
                        preset = gr.Dropdown(
         | 
| 1030 | 
             
                            label="Preset Configuration 🎛️",
         | 
| 1031 | 
            +
                            choices=["default", "rock", "techno", "grunge", "indie", "funk_rock"],
         | 
| 1032 | 
             
                            value="default",
         | 
| 1033 | 
             
                            info="Select a preset optimized for specific genres."
         | 
| 1034 | 
             
                        )
         | 
|  | |
| 1038 | 
             
                            value=1300,
         | 
| 1039 | 
             
                            info="Number of generation steps per chunk (1000=~20s, 1500=~30s)."
         | 
| 1040 | 
             
                        )
         | 
| 1041 | 
            +
                        seed = gr.Slider(
         | 
| 1042 | 
            +
                            label="Random Seed 🌱",
         | 
| 1043 | 
            +
                            minimum=0,
         | 
| 1044 | 
            +
                            maximum=10000,
         | 
| 1045 | 
            +
                            value=0,
         | 
| 1046 | 
            +
                            step=1,
         | 
| 1047 | 
            +
                            info="Set a seed for reproducibility (0-10000). Change for different variations."
         | 
| 1048 | 
            +
                        )
         | 
| 1049 | 
             
                        bitrate_state = gr.State(value="96k")  # Default bitrate
         | 
| 1050 | 
             
                        sample_rate_state = gr.State(value="32000")  # Default output sampling rate
         | 
| 1051 | 
             
                        bit_depth_state = gr.State(value="16")  # Default bit depth
         | 
|  | |
| 1100 | 
             
                bit_depth_24_btn.click(set_bit_depth_24, inputs=None, outputs=bit_depth_state)
         | 
| 1101 | 
             
                gen_btn.click(
         | 
| 1102 | 
             
                    generate_music,
         | 
| 1103 | 
            +
                    inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, vram_status, bitrate_state, sample_rate_state, bit_depth_state, seed],
         | 
| 1104 | 
             
                    outputs=[out_audio, status, vram_status]
         | 
| 1105 | 
             
                )
         | 
| 1106 | 
             
                clr_btn.click(
         | 
| 1107 | 
             
                    clear_inputs,
         | 
| 1108 | 
             
                    inputs=None,
         | 
| 1109 | 
            +
                    outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state, seed]
         | 
| 1110 | 
             
                )
         | 
| 1111 | 
             
                log_btn.click(
         | 
| 1112 | 
             
                    get_latest_log,
         | 
