ACE-Singer

Runtime error

App Files Files Community

ginipick commited on May 26

Commit

9681ed4

verified ·

1 Parent(s): 0141981

Update ui/components.py

Browse files

Files changed (1) hide show

ui/components.py +841 -851

ui/components.py CHANGED Viewed

@@ -1,11 +1,3 @@
-"""
-ACE-Step: A Step Towards Music Generation Foundation Model
-https://github.com/ace-step/ACE-Step
-Apache 2.0 License
-"""
 import gradio as gr
 import librosa
 import os
@@ -150,7 +142,7 @@ QUALITY_PRESETS = {
     "High Quality": {
         "infer_step": 200,
         "guidance_scale": 18.0,
-        "scheduler_type": "heun",
         "omega_scale": 15.0,
         "use_erg_diffusion": True,
         "use_erg_tag": True,
@@ -159,7 +151,7 @@ QUALITY_PRESETS = {
     "Ultra (Best)": {
         "infer_step": 299,
         "guidance_scale": 20.0,
-        "scheduler_type": "heun",
         "omega_scale": 20.0,
         "use_erg_diffusion": True,
         "use_erg_tag": True,
@@ -398,510 +390,268 @@ def create_text2music_ui(
     enhanced_process_func = create_enhanced_process_func(text2music_process_func)
     with gr.Row():
-        with gr.Column():
-            # 품질 및 성능 설정 섹션 추가
             with gr.Group():
-                gr.Markdown("### ⚡ 품질 & 성능 설정")
-                with gr.Row():
-                    quality_preset = gr.Dropdown(
-                        choices=list(QUALITY_PRESETS.keys()),
-                        value="Standard",
-                        label="품질 프리셋",
-                        scale=2
-                    )
-                    multi_seed_mode = gr.Dropdown(
-                        choices=list(MULTI_SEED_OPTIONS.keys()),
-                        value="Single",
-                        label="다중 생성 모드",
-                        scale=2,
-                        info="여러 번 생성하여 최고 품질 선택"
-                    )
-                preset_description = gr.Textbox(
-                    value=QUALITY_PRESETS["Standard"]["description"],
-                    label="설명",
-                    interactive=False,
-                    max_lines=1
-                )
-            with gr.Row(equal_height=True):
-                audio_duration = gr.Slider(
-                    -1,
-                    240.0,
-                    step=0.00001,
-                    value=-1,
-                    label="Audio Duration",
-                    interactive=True,
-                    info="-1 means random duration (30 ~ 240).",
-                    scale=7,
-                )
-                sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
-                preview_bnt = gr.Button("🎵 Preview", variant="secondary", scale=2)
-            # audio2audio
-            with gr.Row(equal_height=True):
-                audio2audio_enable = gr.Checkbox(
-                    label="Enable Audio2Audio",
-                    value=False,
-                    info="Check to enable Audio-to-Audio generation using a reference audio.",
-                    elem_id="audio2audio_checkbox"
-                )
-                lora_name_or_path = gr.Dropdown(
-                    label="Lora Name or Path",
-                    choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
-                    value="none",
-                    allow_custom_value=True,
-                )
-            ref_audio_input = gr.Audio(
-                type="filepath",
-                label="Reference Audio (for Audio2Audio)",
-                visible=False,
-                elem_id="ref_audio_input",
-                show_download_button=True
-            )
-            ref_audio_strength = gr.Slider(
-                label="Refer audio strength",
-                minimum=0.0,
-                maximum=1.0,
-                step=0.01,
-                value=0.5,
-                elem_id="ref_audio_strength",
-                visible=False,
-                interactive=True,
-            )
-            def toggle_ref_audio_visibility(is_checked):
-                return (
-                    gr.update(visible=is_checked, elem_id="ref_audio_input"),
-                    gr.update(visible=is_checked, elem_id="ref_audio_strength"),
-                )
-            audio2audio_enable.change(
-                fn=toggle_ref_audio_visibility,
-                inputs=[audio2audio_enable],
-                outputs=[ref_audio_input, ref_audio_strength],
-            )
-            with gr.Column(scale=2):
-                with gr.Group():
-                    gr.Markdown("""### 🎼 스마트 프롬프트 시스템
-                    <center>장르 선택 시 자동으로 최적화된 태그가 추가됩니다. 콤마로 구분하여 태그를 입력하세요.</center>""")
-                    with gr.Row():
                         genre_preset = gr.Dropdown(
                             choices=["Custom"] + list(GENRE_PRESETS.keys()),
                             value="Custom",
-                            label="장르 프리셋",
-                            scale=1,
                         )
                         enable_smart_enhancement = gr.Checkbox(
-                            label="스마트 향상",
                             value=True,
-                            info="자동 태그 최적화",
-                            scale=1
                         )
-                    prompt = gr.Textbox(
-                        lines=2,
-                        label="Tags",
-                        max_lines=4,
-                        value=TAG_DEFAULT,
-                        placeholder="콤마로 구분된 태그들...",
-                    )
             with gr.Group():
-                gr.Markdown("""### 📝 가사 입력
-                <center>구조 태그 [verse], [chorus], [bridge] 사용을 권장합니다.<br>[instrumental] 또는 [inst]를 사용하면 연주곡을 생성합니다.</center>""")
-                # --- 새로운 UI 요소: 주제 입력 후 가사 자동 생성 ---
                 with gr.Row():
-                    topic_for_lyrics = gr.Textbox(
-                        lines=1,
-                        label="가사 주제",
-                        placeholder="예) 첫사랑 이별, 여름 바다, 가을 낙엽 등..."
-                    )
-                    generate_lyrics_btn = gr.Button("가사 생성", variant="secondary")
-                # 사용자 직접 입력 가사 박스
                 lyrics = gr.Textbox(
-                    lines=9,
-                    label="Lyrics",
-                    max_lines=13,
                     value=LYRIC_DEFAULT,
-                    placeholder="가사를 입력하세요. [verse], [chorus] 등의 구조 태그 사용을 권장합니다."
-                )
-                # OpenAI를 통해 가사 자동 생성하는 함수
-                def generate_lyrics_ui(topic_text):
-                    # OpenAI 호출
-                    generated = openai_generate_lyrics(topic_text)
-                    return generated
-                # 가사 생성 버튼 클릭 시, lyrics 박스에 반영
-                generate_lyrics_btn.click(
-                    fn=generate_lyrics_ui,
-                    inputs=[topic_for_lyrics],
-                    outputs=[lyrics]
-                )
-            with gr.Accordion("Basic Settings", open=False):
-                infer_step = gr.Slider(
-                    minimum=1,
-                    maximum=300,
-                    step=1,
-                    value=150,
-                    label="Infer Steps",
-                    interactive=True,
-                )
-                guidance_scale = gr.Slider(
-                    minimum=0.0,
-                    maximum=30.0,
-                    step=0.1,
-                    value=15.0,
-                    label="Guidance Scale",
-                    interactive=True,
-                    info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
-                )
-                guidance_scale_text = gr.Slider(
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
-                    label="Guidance Scale Text",
-                    interactive=True,
-                    info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
-                )
-                guidance_scale_lyric = gr.Slider(
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
-                    label="Guidance Scale Lyric",
-                    interactive=True,
                 )
-                manual_seeds = gr.Textbox(
-                    label="manual seeds (default None)",
-                    placeholder="1,2,3,4",
-                    value=None,
-                    info="Seed for the generation",
-                )
-            with gr.Accordion("Advanced Settings", open=False):
-                scheduler_type = gr.Radio(
-                    ["euler", "heun"],
-                    value="euler",
-                    label="Scheduler Type",
-                    elem_id="scheduler_type",
-                    info="Scheduler type for the generation. euler is recommended. heun will take more time.",
-                )
-                cfg_type = gr.Radio(
-                    ["cfg", "apg", "cfg_star"],
-                    value="apg",
-                    label="CFG Type",
-                    elem_id="cfg_type",
-                    info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
-                )
-                use_erg_tag = gr.Checkbox(
-                    label="use ERG for tag",
-                    value=True,
-                    info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
                 )
-                use_erg_lyric = gr.Checkbox(
-                    label="use ERG for lyric",
-                    value=False,
-                    info="The same but apply to lyric encoder's attention.",
                 )
-                use_erg_diffusion = gr.Checkbox(
-                    label="use ERG for diffusion",
-                    value=True,
-                    info="The same but apply to diffusion model's attention.",
                 )
-                omega_scale = gr.Slider(
-                    minimum=-100.0,
-                    maximum=100.0,
-                    step=0.1,
-                    value=10.0,
-                    label="Granularity Scale",
-                    interactive=True,
-                    info="Granularity scale for the generation. Higher values can reduce artifacts",
-                )
-                guidance_interval = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     step=0.01,
                     value=0.5,
-                    label="Guidance Interval",
-                    interactive=True,
-                    info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
-                )
-                guidance_interval_decay = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=0.0,
-                    label="Guidance Interval Decay",
-                    interactive=True,
-                    info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
-                )
-                min_guidance_scale = gr.Slider(
-                    minimum=0.0,
-                    maximum=200.0,
-                    step=0.1,
-                    value=3.0,
-                    label="Min Guidance Scale",
-                    interactive=True,
-                    info="Min guidance scale for guidance interval decay's end scale",
-                )
-                oss_steps = gr.Textbox(
-                    label="OSS Steps",
-                    placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
-                    value=None,
-                    info="Optimal Steps for the generation. But not test well",
                 )
-            text2music_bnt = gr.Button("🎵 Generate Music", variant="primary", size="lg")
-            # 모든 UI 요소가 정의된 후 이벤트 핸들러 설정
-            genre_preset.change(
-                fn=update_tags_from_preset,
-                inputs=[genre_preset],
-                outputs=[prompt]
-            )
-            quality_preset.change(
-                fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
-                inputs=[quality_preset],
-                outputs=[preset_description]
-            )
-            quality_preset.change(
-                fn=update_quality_preset,
-                inputs=[quality_preset],
-                outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
-            )
-        with gr.Column():
             outputs, input_params_json = create_output_ui()
-            # 실시간 프리뷰 기능
-            def generate_preview(prompt, lyrics, genre_preset):
-                """10초 프리뷰 생성"""
-                preview_params = {
-                    "audio_duration": 10,
-                    "infer_step": 50,
-                    "guidance_scale": 12.0,
-                    "scheduler_type": "euler",
-                    "cfg_type": "apg",
-                    "omega_scale": 5.0,
-                }
-                enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
-                try:
-                    # 실제 구현에서는 빠른 생성 모드 사용
-                    result = enhanced_process_func(
-                        preview_params["audio_duration"],
-                        enhanced_prompt,
-                        lyrics[:200],  # 가사 일부만 사용
-                        preview_params["infer_step"],
-                        preview_params["guidance_scale"],
-                        preview_params["scheduler_type"],
-                        preview_params["cfg_type"],
-                        preview_params["omega_scale"],
-                        None,  # manual_seeds
-                        0.5,   # guidance_interval
-                        0.0,   # guidance_interval_decay
-                        3.0,   # min_guidance_scale
-                        True,  # use_erg_tag
-                        False, # use_erg_lyric
-                        True,  # use_erg_diffusion
-                        None,  # oss_steps
-                        0.0,   # guidance_scale_text
-                        0.0,   # guidance_scale_lyric
-                        multi_seed_mode="Single"
                     )
-                    return result[0] if result else None
-                except Exception as e:
-                    return f"프리뷰 생성 실패: {str(e)}"
-            preview_bnt.click(
-                fn=generate_preview,
-                inputs=[prompt, lyrics, genre_preset],
-                outputs=[outputs[0]]
-            )
-            with gr.Tab("retake"):
-                retake_variance = gr.Slider(
-                    minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
-                )
-                retake_seeds = gr.Textbox(
-                    label="retake seeds (default None)", placeholder="", value=None
-                )
-                retake_bnt = gr.Button("Retake", variant="primary")
-                retake_outputs, retake_input_params_json = create_output_ui("Retake")
-                def retake_process_func(json_data, retake_variance, retake_seeds):
-                    return enhanced_process_func(
-                        json_data.get("audio_duration", 30),
-                        json_data.get("prompt", ""),
-                        json_data.get("lyrics", ""),
-                        json_data.get("infer_step", 100),
-                        json_data.get("guidance_scale", 15.0),
-                        json_data.get("scheduler_type", "euler"),
-                        json_data.get("cfg_type", "apg"),
-                        json_data.get("omega_scale", 10.0),
-                        retake_seeds,
-                        json_data.get("guidance_interval", 0.5),
-                        json_data.get("guidance_interval_decay", 0.0),
-                        json_data.get("min_guidance_scale", 3.0),
-                        json_data.get("use_erg_tag", True),
-                        json_data.get("use_erg_lyric", False),
-                        json_data.get("use_erg_diffusion", True),
-                        json_data.get("oss_steps", None),
-                        json_data.get("guidance_scale_text", 0.0),
-                        json_data.get("guidance_scale_lyric", 0.0),
-                        audio2audio_enable=json_data.get("audio2audio_enable", False),
-                        ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
-                        ref_audio_input=json_data.get("ref_audio_input", None),
-                        lora_name_or_path=json_data.get("lora_name_or_path", "none"),
-                        multi_seed_mode="Best of 3",  # retake는 자동으로 다중 생성
-                        retake_variance=retake_variance,
-                        task="retake"
                     )
-                retake_bnt.click(
-                    fn=retake_process_func,
-                    inputs=[
-                        input_params_json,
-                        retake_variance,
-                        retake_seeds,
-                    ],
-                    outputs=retake_outputs + [retake_input_params_json],
-                )
-            with gr.Tab("repainting"):
-                retake_variance = gr.Slider(
-                    minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
-                )
-                retake_seeds = gr.Textbox(
-                    label="repaint seeds (default None)", placeholder="", value=None
-                )
-                repaint_start = gr.Slider(
-                    minimum=0.0,
-                    maximum=240.0,
-                    step=0.01,
-                    value=0.0,
-                    label="Repaint Start Time",
-                    interactive=True,
-                )
-                repaint_end = gr.Slider(
-                    minimum=0.0,
-                    maximum=240.0,
-                    step=0.01,
-                    value=30.0,
-                    label="Repaint End Time",
-                    interactive=True,
-                )
-                repaint_source = gr.Radio(
-                    ["text2music", "last_repaint", "upload"],
-                    value="text2music",
-                    label="Repaint Source",
-                    elem_id="repaint_source",
-                )
-                repaint_source_audio_upload = gr.Audio(
-                    label="Upload Audio",
-                    type="filepath",
-                    visible=False,
-                    elem_id="repaint_source_audio_upload",
-                    show_download_button=True,
-                )
-                repaint_source.change(
-                    fn=lambda x: gr.update(
-                        visible=x == "upload", elem_id="repaint_source_audio_upload"
-                    ),
-                    inputs=[repaint_source],
-                    outputs=[repaint_source_audio_upload],
-                )
-                repaint_bnt = gr.Button("Repaint", variant="primary")
-                repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
-                def repaint_process_func(
-                    text2music_json_data,
-                    repaint_json_data,
-                    retake_variance,
-                    retake_seeds,
-                    repaint_start,
-                    repaint_end,
-                    repaint_source,
-                    repaint_source_audio_upload,
-                    prompt,
-                    lyrics,
-                    infer_step,
-                    guidance_scale,
-                    scheduler_type,
-                    cfg_type,
-                    omega_scale,
-                    manual_seeds,
-                    guidance_interval,
-                    guidance_interval_decay,
-                    min_guidance_scale,
-                    use_erg_tag,
-                    use_erg_lyric,
-                    use_erg_diffusion,
-                    oss_steps,
-                    guidance_scale_text,
-                    guidance_scale_lyric,
-                ):
-                    if repaint_source == "upload":
-                        src_audio_path = repaint_source_audio_upload
-                        audio_duration = librosa.get_duration(filename=src_audio_path)
-                        json_data = {"audio_duration": audio_duration}
-                    elif repaint_source == "text2music":
-                        json_data = text2music_json_data
-                        src_audio_path = json_data["audio_path"]
-                    elif repaint_source == "last_repaint":
-                        json_data = repaint_json_data
-                        src_audio_path = json_data["audio_path"]
-                    return enhanced_process_func(
-                        json_data["audio_duration"],
-                        prompt,
-                        lyrics,
-                        infer_step,
-                        guidance_scale,
-                        scheduler_type,
-                        cfg_type,
-                        omega_scale,
-                        manual_seeds,
-                        guidance_interval,
-                        guidance_interval_decay,
-                        min_guidance_scale,
-                        use_erg_tag,
-                        use_erg_lyric,
-                        use_erg_diffusion,
-                        oss_steps,
-                        guidance_scale_text,
-                        guidance_scale_lyric,
-                        retake_seeds=retake_seeds,
-                        retake_variance=retake_variance,
-                        task="repaint",
-                        repaint_start=repaint_start,
-                        repaint_end=repaint_end,
-                        src_audio_path=src_audio_path,
-                        lora_name_or_path="none"
                     )
-                repaint_bnt.click(
-                    fn=repaint_process_func,
-                    inputs=[
-                        input_params_json,
-                        repaint_input_params_json,
                         retake_variance,
                         retake_seeds,
                         repaint_start,
@@ -925,157 +675,150 @@ def create_text2music_ui(
                         oss_steps,
                         guidance_scale_text,
                         guidance_scale_lyric,
-                    ],
-                    outputs=repaint_outputs + [repaint_input_params_json],
-                )
-            with gr.Tab("edit"):
-                edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
-                edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
-                retake_seeds = gr.Textbox(
-                    label="edit seeds (default None)", placeholder="", value=None
-                )
-                edit_type = gr.Radio(
-                    ["only_lyrics", "remix"],
-                    value="only_lyrics",
-                    label="Edit Type",
-                    elem_id="edit_type",
-                    info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
-                )
-                edit_n_min = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=0.6,
-                    label="edit_n_min",
-                    interactive=True,
-                )
-                edit_n_max = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=1.0,
-                    label="edit_n_max",
-                    interactive=True,
-                )
-                def edit_type_change_func(edit_type):
-                    if edit_type == "only_lyrics":
-                        n_min = 0.6
-                        n_max = 1.0
-                    elif edit_type == "remix":
-                        n_min = 0.2
-                        n_max = 0.4
-                    return n_min, n_max
-                edit_type.change(
-                    edit_type_change_func,
-                    inputs=[edit_type],
-                    outputs=[edit_n_min, edit_n_max],
-                )
-                edit_source = gr.Radio(
-                    ["text2music", "last_edit", "upload"],
-                    value="text2music",
-                    label="Edit Source",
-                    elem_id="edit_source",
-                )
-                edit_source_audio_upload = gr.Audio(
-                    label="Upload Audio",
-                    type="filepath",
-                    visible=False,
-                    elem_id="edit_source_audio_upload",
-                    show_download_button=True,
-                )
-                edit_source.change(
-                    fn=lambda x: gr.update(
-                        visible=x == "upload", elem_id="edit_source_audio_upload"
-                    ),
-                    inputs=[edit_source],
-                    outputs=[edit_source_audio_upload],
-                )
-                edit_bnt = gr.Button("Edit", variant="primary")
-                edit_outputs, edit_input_params_json = create_output_ui("Edit")
-                def edit_process_func(
-                    text2music_json_data,
-                    edit_input_params_json,
-                    edit_source,
-                    edit_source_audio_upload,
-                    prompt,
-                    lyrics,
-                    edit_prompt,
-                    edit_lyrics,
-                    edit_n_min,
-                    edit_n_max,
-                    infer_step,
-                    guidance_scale,
-                    scheduler_type,
-                    cfg_type,
-                    omega_scale,
-                    manual_seeds,
-                    guidance_interval,
-                    guidance_interval_decay,
-                    min_guidance_scale,
-                    use_erg_tag,
-                    use_erg_lyric,
-                    use_erg_diffusion,
-                    oss_steps,
-                    guidance_scale_text,
-                    guidance_scale_lyric,
-                    retake_seeds,
-                ):
-                    if edit_source == "upload":
-                        src_audio_path = edit_source_audio_upload
-                        audio_duration = librosa.get_duration(filename=src_audio_path)
-                        json_data = {"audio_duration": audio_duration}
-                    elif edit_source == "text2music":
-                        json_data = text2music_json_data
-                        src_audio_path = json_data["audio_path"]
-                    elif edit_source == "last_edit":
-                        json_data = edit_input_params_json
-                        src_audio_path = json_data["audio_path"]
-                    if not edit_prompt:
-                        edit_prompt = prompt
-                    if not edit_lyrics:
-                        edit_lyrics = lyrics
-                    return enhanced_process_func(
-                        json_data["audio_duration"],
-                        prompt,
-                        lyrics,
-                        infer_step,
-                        guidance_scale,
-                        scheduler_type,
-                        cfg_type,
-                        omega_scale,
-                        manual_seeds,
-                        guidance_interval,
-                        guidance_interval_decay,
-                        min_guidance_scale,
-                        use_erg_tag,
-                        use_erg_lyric,
-                        use_erg_diffusion,
-                        oss_steps,
-                        guidance_scale_text,
-                        guidance_scale_lyric,
-                        task="edit",
-                        src_audio_path=src_audio_path,
-                        edit_target_prompt=edit_prompt,
-                        edit_target_lyrics=edit_lyrics,
-                        edit_n_min=edit_n_min,
-                        edit_n_max=edit_n_max,
-                        retake_seeds=retake_seeds,
-                        lora_name_or_path="none"
                     )
-                edit_bnt.click(
-                    fn=edit_process_func,
-                    inputs=[
-                        input_params_json,
                         edit_input_params_json,
                         edit_source,
                         edit_source_audio_upload,
@@ -1101,127 +844,132 @@ def create_text2music_ui(
                         guidance_scale_text,
                         guidance_scale_lyric,
                         retake_seeds,
-                    ],
-                    outputs=edit_outputs + [edit_input_params_json],
-                )
-            with gr.Tab("extend"):
-                extend_seeds = gr.Textbox(
-                    label="extend seeds (default None)", placeholder="", value=None
-                )
-                left_extend_length = gr.Slider(
-                    minimum=0.0,
-                    maximum=240.0,
-                    step=0.01,
-                    value=0.0,
-                    label="Left Extend Length",
-                    interactive=True,
-                )
-                right_extend_length = gr.Slider(
-                    minimum=0.0,
-                    maximum=240.0,
-                    step=0.01,
-                    value=30.0,
-                    label="Right Extend Length",
-                    interactive=True,
-                )
-                extend_source = gr.Radio(
-                    ["text2music", "last_extend", "upload"],
-                    value="text2music",
-                    label="Extend Source",
-                    elem_id="extend_source",
-                )
-                extend_source_audio_upload = gr.Audio(
-                    label="Upload Audio",
-                    type="filepath",
-                    visible=False,
-                    elem_id="extend_source_audio_upload",
-                    show_download_button=True,
-                )
-                extend_source.change(
-                    fn=lambda x: gr.update(
-                        visible=x == "upload", elem_id="extend_source_audio_upload"
-                    ),
-                    inputs=[extend_source],
-                    outputs=[extend_source_audio_upload],
-                )
-                extend_bnt = gr.Button("Extend", variant="primary")
-                extend_outputs, extend_input_params_json = create_output_ui("Extend")
-                def extend_process_func(
-                    text2music_json_data,
-                    extend_input_params_json,
-                    extend_seeds,
-                    left_extend_length,
-                    right_extend_length,
-                    extend_source,
-                    extend_source_audio_upload,
-                    prompt,
-                    lyrics,
-                    infer_step,
-                    guidance_scale,
-                    scheduler_type,
-                    cfg_type,
-                    omega_scale,
-                    manual_seeds,
-                    guidance_interval,
-                    guidance_interval_decay,
-                    min_guidance_scale,
-                    use_erg_tag,
-                    use_erg_lyric,
-                    use_erg_diffusion,
-                    oss_steps,
-                    guidance_scale_text,
-                    guidance_scale_lyric,
-                ):
-                    if extend_source == "upload":
-                        src_audio_path = extend_source_audio_upload
-                        # get audio duration
-                        audio_duration = librosa.get_duration(filename=src_audio_path)
-                        json_data = {"audio_duration": audio_duration}
-                    elif extend_source == "text2music":
-                        json_data = text2music_json_data
-                        src_audio_path = json_data["audio_path"]
-                    elif extend_source == "last_extend":
-                        json_data = extend_input_params_json
-                        src_audio_path = json_data["audio_path"]
-                    repaint_start = -left_extend_length
-                    repaint_end = json_data["audio_duration"] + right_extend_length
-                    return enhanced_process_func(
-                        json_data["audio_duration"],
-                        prompt,
-                        lyrics,
-                        infer_step,
-                        guidance_scale,
-                        scheduler_type,
-                        cfg_type,
-                        omega_scale,
-                        manual_seeds,
-                        guidance_interval,
-                        guidance_interval_decay,
-                        min_guidance_scale,
-                        use_erg_tag,
-                        use_erg_lyric,
-                        use_erg_diffusion,
-                        oss_steps,
-                        guidance_scale_text,
-                        guidance_scale_lyric,
-                        retake_seeds=extend_seeds,
-                        retake_variance=1.0,
-                        task="extend",
-                        repaint_start=repaint_start,
-                        repaint_end=repaint_end,
-                        src_audio_path=src_audio_path,
-                        lora_name_or_path="none"
                     )
-                extend_bnt.click(
-                    fn=extend_process_func,
-                    inputs=[
-                        input_params_json,
                         extend_input_params_json,
                         extend_seeds,
                         left_extend_length,
@@ -1245,92 +993,245 @@ def create_text2music_ui(
                         oss_steps,
                         guidance_scale_text,
                         guidance_scale_lyric,
-                    ],
-                    outputs=extend_outputs + [extend_input_params_json],
-                )
-        def json2output(json_data):
-            return (
-                json_data["audio_duration"],
-                json_data["prompt"],
-                json_data["lyrics"],
-                json_data["infer_step"],
-                json_data["guidance_scale"],
-                json_data["scheduler_type"],
-                json_data["cfg_type"],
-                json_data["omega_scale"],
-                ", ".join(map(str, json_data["actual_seeds"])),
-                json_data["guidance_interval"],
-                json_data["guidance_interval_decay"],
-                json_data["min_guidance_scale"],
-                json_data["use_erg_tag"],
-                json_data["use_erg_lyric"],
-                json_data["use_erg_diffusion"],
-                ", ".join(map(str, json_data["oss_steps"])),
-                (
-                    json_data["guidance_scale_text"]
-                    if "guidance_scale_text" in json_data
-                    else 0.0
-                ),
-                (
-                    json_data["guidance_scale_lyric"]
-                    if "guidance_scale_lyric" in json_data
-                    else 0.0
-                ),
-                (
-                    json_data["audio2audio_enable"]
-                    if "audio2audio_enable" in json_data
-                    else False
-                ),
-                (
-                    json_data["ref_audio_strength"]
-                    if "ref_audio_strength" in json_data
-                    else 0.5
-                ),
-                (
-                    json_data["ref_audio_input"]
-                    if "ref_audio_input" in json_data
-                    else None
-                ),
             )
-        def sample_data(lora_name_or_path_):
-            if sample_data_func:
-                # sample_data_func는 인자를 받지 않는 메서드이므로 인자 없이 호출
-                json_data = sample_data_func()  # lora_name_or_path_ 인자 제거
-                return json2output(json_data)
-            return {}
-        sample_bnt.click(
-            sample_data,
-            inputs=[lora_name_or_path],
-            outputs=[
-                audio_duration,
-                prompt,
-                lyrics,
-                infer_step,
-                guidance_scale,
-                scheduler_type,
-                cfg_type,
-                omega_scale,
-                manual_seeds,
-                guidance_interval,
-                guidance_interval_decay,
-                min_guidance_scale,
-                use_erg_tag,
-                use_erg_lyric,
-                use_erg_diffusion,
-                oss_steps,
-                guidance_scale_text,
-                guidance_scale_lyric,
-                audio2audio_enable,
-                ref_audio_strength,
-                ref_audio_input,
-            ],
         )
-    # 메인 생성 버튼 이벤트 (향상된 함수 사용)
     text2music_bnt.click(
         fn=enhanced_process_func,
         inputs=[
@@ -1370,57 +1271,146 @@ def create_main_demo_ui(
     load_data_func=dump_func,
 ):
     with gr.Blocks(
-        title="ACE-Step Model 1.0 DEMO - Enhanced",
-        theme=gr.themes.Soft(),
         css="""
         .gradio-container {
-            max-width: 1200px !important;
         }
-        .quality-info {
-            background: linear-gradient(45deg, #f0f8ff, #e6f3ff);
-            padding: 10px;
-            border-radius: 8px;
-            margin: 5px 0;
         }
         """
     ) as demo:
         gr.Markdown(
             """
-            <h1 style="text-align: center;">🎵 ACE-Step PRO</h1>
-            <div style="text-align: center; margin: 20px;">
-                <p><strong>🚀 새로운 기능:</strong> 품질 프리셋 | 다중 생성 | 스마트 프롬프트 | 실시간 프리뷰 | 품질 점수</p>
-                <p>
-                    <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
-                    <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
-                    <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
                 </p>
             </div>
-        """
         )
-        # 사용법 가이드 추가
-        with gr.Accordion("📖 사용법 가이드", open=False):
-            gr.Markdown("""
-            ### 🎯 빠른 시작
-            1. **장르 선택**: 원하는 음악 장르를 선택하면 자동으로 최적화된 태그가 적용됩니다
-            2. **품질 설정**: Draft(빠름) → Standard(권장) → High Quality → Ultra 중 선택
-            3. **다중 생성**: "Best of 3/5/10" 선택하면 여러 번 생성하여 최고 품질을 자동 선택합니다
-            4. **프리뷰**: 전체 생성 전 10초 프리뷰로 빠르게 확인할 수 있습니다
-            ### 💡 품질 향상 팁
-            - **고품질 생성**: "High Quality" + "Best of 5" 조합 추천
-            - **빠른 테스트**: "Draft" + "프리뷰" 기능 활용
-            - **장르 특화**: 장르 프리셋 선택 후 "스마트 향상" 체크
-            - **가사 구조**: [verse], [chorus], [bridge] 태그 적극 활용
-            """)
-        with gr.Tab("🎵 Enhanced Text2Music"):
             create_text2music_ui(
                 gr=gr,
                 text2music_process_func=text2music_process_func,
                 sample_data_func=sample_data_func,
                 load_data_func=load_data_func,
             )
     return demo
@@ -1430,4 +1420,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         share=True  # 공유 링크 생성
-    )

 import gradio as gr
 import librosa
 import os
     "High Quality": {
         "infer_step": 200,
         "guidance_scale": 18.0,
+        "scheduler_type": "euler",
         "omega_scale": 15.0,
         "use_erg_diffusion": True,
         "use_erg_tag": True,
     "Ultra (Best)": {
         "infer_step": 299,
         "guidance_scale": 20.0,
+        "scheduler_type": "euler",
         "omega_scale": 20.0,
         "use_erg_diffusion": True,
         "use_erg_tag": True,
     enhanced_process_func = create_enhanced_process_func(text2music_process_func)
     with gr.Row():
+        # 왼쪽 컬럼 - 입력 설정
+        with gr.Column(scale=5):
+            # 상단 메인 컨트롤
             with gr.Group():
+                gr.Markdown("## 🎯 Quick Settings")
+                with gr.Row():
+                    with gr.Column(scale=2):
                         genre_preset = gr.Dropdown(
                             choices=["Custom"] + list(GENRE_PRESETS.keys()),
                             value="Custom",
+                            label="🎵 장르 프리셋",
+                            interactive=True,
+                        )
+                    with gr.Column(scale=2):
+                        quality_preset = gr.Dropdown(
+                            choices=list(QUALITY_PRESETS.keys()),
+                            value="Standard",
+                            label="⚡ 품질 프리셋",
+                            interactive=True
+                        )
+                    with gr.Column(scale=1):
+                        audio_duration = gr.Slider(
+                            -1,
+                            240.0,
+                            step=1,
+                            value=-1,
+                            label="⏱️ 길이 (초)",
+                            info="-1 = 랜덤",
+                            interactive=True,
+                        )
+                with gr.Row():
+                    preset_description = gr.Textbox(
+                        value=QUALITY_PRESETS["Standard"]["description"],
+                        label="품질 설명",
+                        interactive=False,
+                        max_lines=1
+                    )
+            # 프롬프트 섹션
+            with gr.Group():
+                gr.Markdown("## 🎼 Music Prompt")
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        prompt = gr.Textbox(
+                            lines=3,
+                            label="태그 (콤마로 구분)",
+                            value=TAG_DEFAULT,
+                            placeholder="장르, 악기, BPM, 분위기 등을 콤마로 구분하여 입력...",
+                            elem_id="prompt"
                         )
+                    with gr.Column(scale=1):
                         enable_smart_enhancement = gr.Checkbox(
+                            label="✨ 스마트 향상",
                             value=True,
+                            info="자동 태그 최적화"
+                        )
+                        multi_seed_mode = gr.Dropdown(
+                            choices=list(MULTI_SEED_OPTIONS.keys()),
+                            value="Single",
+                            label="🎲 다중 생성",
+                            info="여러 번 생성하여 최고 품질 선택"
                         )
+            # 가사 섹션
             with gr.Group():
+                gr.Markdown("## 📝 Lyrics")
                 with gr.Row():
+                    with gr.Column(scale=3):
+                        topic_for_lyrics = gr.Textbox(
+                            lines=1,
+                            label="가사 주제 (AI 자동 생성)",
+                            placeholder="예: 첫사랑의 설렘, 여름밤의 추억, 도시의 불빛...",
+                            elem_id="topic"
+                        )
+                    with gr.Column(scale=1):
+                        generate_lyrics_btn = gr.Button(
+                            "🤖 가사 생성",
+                            variant="secondary",
+                            size="sm"
+                        )
                 lyrics = gr.Textbox(
+                    lines=10,
+                    label="가사 입력",
                     value=LYRIC_DEFAULT,
+                    placeholder="[verse], [chorus], [bridge] 태그를 사용하여 구조화된 가사를 입력하세요...",
+                    elem_id="lyrics"
                 )
+            # audio2audio 옵션 (더 간결하게)
+            with gr.Accordion("🎵 Audio2Audio 설정", open=False):
+                audio2audio_enable = gr.Checkbox(
+                    label="Audio2Audio 활성화",
+                    value=False,
+                    info="참조 오디오를 사용한 생성"
                 )
+                lora_name_or_path = gr.Dropdown(
+                    label="LoRA 모델",
+                    choices=["none", "ACE-Step/ACE-Step-v1-chinese-rap-LoRA"],
+                    value="none",
+                    allow_custom_value=True,
                 )
+                ref_audio_input = gr.Audio(
+                    type="filepath",
+                    label="참조 오디오",
+                    visible=False
                 )
+                ref_audio_strength = gr.Slider(
+                    label="참조 강도",
                     minimum=0.0,
                     maximum=1.0,
                     step=0.01,
                     value=0.5,
+                    visible=False
                 )
+            # 고급 설정 (접혀있음)
+            with gr.Accordion("⚙️ 고급 설정", open=False):
+                with gr.Row():
+                    with gr.Column():
+                        infer_step = gr.Slider(1, 300, 150, 1, label="추론 스텝")
+                        guidance_scale = gr.Slider(0.0, 30.0, 15.0, 0.1, label="가이던스 스케일")
+                        manual_seeds = gr.Textbox(label="시드값", placeholder="1,2,3,4", value=None)
+                    with gr.Column():
+                        scheduler_type = gr.Radio(["euler", "heun"], value="euler", label="스케줄러")
+                        cfg_type = gr.Radio(["cfg", "apg", "cfg_star"], value="apg", label="CFG 타입")
+                        omega_scale = gr.Slider(-100.0, 100.0, 10.0, 0.1, label="그래뉼러리티 스케일")
+                with gr.Row():
+                    with gr.Column():
+                        use_erg_tag = gr.Checkbox(label="ERG for tag", value=True)
+                        use_erg_lyric = gr.Checkbox(label="ERG for lyric", value=False)
+                        use_erg_diffusion = gr.Checkbox(label="ERG for diffusion", value=True)
+                    with gr.Column():
+                        guidance_interval = gr.Slider(0.0, 1.0, 0.5, 0.01, label="가이던스 인터벌")
+                        guidance_interval_decay = gr.Slider(0.0, 1.0, 0.0, 0.01, label="가이던스 감쇠")
+                        min_guidance_scale = gr.Slider(0.0, 200.0, 3.0, 0.1, label="최소 가이던스")
+                with gr.Row():
+                    guidance_scale_text = gr.Slider(0.0, 10.0, 0.0, 0.1, label="텍스트 가이던스")
+                    guidance_scale_lyric = gr.Slider(0.0, 10.0, 0.0, 0.1, label="가사 가이던스")
+                    oss_steps = gr.Textbox(label="OSS Steps", placeholder="16, 29, 52...", value=None)
+            # 생성 버튼들
+            with gr.Row():
+                sample_bnt = gr.Button("🎲 샘플", variant="secondary", scale=1)
+                preview_bnt = gr.Button("👁️ 미리듣기 (10초)", variant="secondary", scale=2)
+                text2music_bnt = gr.Button("🎵 음악 생성", variant="primary", scale=3, size="lg")
+        # 오른쪽 컬럼 - 출력
+        with gr.Column(scale=5):
+            gr.Markdown("## 🎧 Generated Music")
             outputs, input_params_json = create_output_ui()
+            # 숨겨진 탭들 (visible=False)
+            with gr.Tabs(visible=False):
+                with gr.Tab("retake", visible=False):
+                    retake_variance = gr.Slider(
+                        minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
                     )
+                    retake_seeds = gr.Textbox(
+                        label="retake seeds (default None)", placeholder="", value=None
                     )
+                    retake_bnt = gr.Button("Retake", variant="primary")
+                    retake_outputs, retake_input_params_json = create_output_ui("Retake")
+                    def retake_process_func(json_data, retake_variance, retake_seeds):
+                        return enhanced_process_func(
+                            json_data.get("audio_duration", 30),
+                            json_data.get("prompt", ""),
+                            json_data.get("lyrics", ""),
+                            json_data.get("infer_step", 100),
+                            json_data.get("guidance_scale", 15.0),
+                            json_data.get("scheduler_type", "euler"),
+                            json_data.get("cfg_type", "apg"),
+                            json_data.get("omega_scale", 10.0),
+                            retake_seeds,
+                            json_data.get("guidance_interval", 0.5),
+                            json_data.get("guidance_interval_decay", 0.0),
+                            json_data.get("min_guidance_scale", 3.0),
+                            json_data.get("use_erg_tag", True),
+                            json_data.get("use_erg_lyric", False),
+                            json_data.get("use_erg_diffusion", True),
+                            json_data.get("oss_steps", None),
+                            json_data.get("guidance_scale_text", 0.0),
+                            json_data.get("guidance_scale_lyric", 0.0),
+                            audio2audio_enable=json_data.get("audio2audio_enable", False),
+                            ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
+                            ref_audio_input=json_data.get("ref_audio_input", None),
+                            lora_name_or_path=json_data.get("lora_name_or_path", "none"),
+                            multi_seed_mode="Best of 3",
+                            retake_variance=retake_variance,
+                            task="retake"
+                        )
+                    retake_bnt.click(
+                        fn=retake_process_func,
+                        inputs=[
+                            input_params_json,
+                            retake_variance,
+                            retake_seeds,
+                        ],
+                        outputs=retake_outputs + [retake_input_params_json],
+                    )
+                with gr.Tab("repainting", visible=False):
+                    retake_variance = gr.Slider(
+                        minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
+                    )
+                    retake_seeds = gr.Textbox(
+                        label="repaint seeds (default None)", placeholder="", value=None
+                    )
+                    repaint_start = gr.Slider(
+                        minimum=0.0,
+                        maximum=240.0,
+                        step=0.01,
+                        value=0.0,
+                        label="Repaint Start Time",
+                        interactive=True,
+                    )
+                    repaint_end = gr.Slider(
+                        minimum=0.0,
+                        maximum=240.0,
+                        step=0.01,
+                        value=30.0,
+                        label="Repaint End Time",
+                        interactive=True,
+                    )
+                    repaint_source = gr.Radio(
+                        ["text2music", "last_repaint", "upload"],
+                        value="text2music",
+                        label="Repaint Source",
+                        elem_id="repaint_source",
+                    )
+                    repaint_source_audio_upload = gr.Audio(
+                        label="Upload Audio",
+                        type="filepath",
+                        visible=False,
+                        elem_id="repaint_source_audio_upload",
+                        show_download_button=True,
+                    )
+                    repaint_source.change(
+                        fn=lambda x: gr.update(
+                            visible=x == "upload", elem_id="repaint_source_audio_upload"
+                        ),
+                        inputs=[repaint_source],
+                        outputs=[repaint_source_audio_upload],
                     )
+                    repaint_bnt = gr.Button("Repaint", variant="primary")
+                    repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
+                    def repaint_process_func(
+                        text2music_json_data,
+                        repaint_json_data,
                         retake_variance,
                         retake_seeds,
                         repaint_start,
                         oss_steps,
                         guidance_scale_text,
                         guidance_scale_lyric,
+                    ):
+                        if repaint_source == "upload":
+                            src_audio_path = repaint_source_audio_upload
+                            audio_duration = librosa.get_duration(filename=src_audio_path)
+                            json_data = {"audio_duration": audio_duration}
+                        elif repaint_source == "text2music":
+                            json_data = text2music_json_data
+                            src_audio_path = json_data["audio_path"]
+                        elif repaint_source == "last_repaint":
+                            json_data = repaint_json_data
+                            src_audio_path = json_data["audio_path"]
+                        return enhanced_process_func(
+                            json_data["audio_duration"],
+                            prompt,
+                            lyrics,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                            retake_seeds=retake_seeds,
+                            retake_variance=retake_variance,
+                            task="repaint",
+                            repaint_start=repaint_start,
+                            repaint_end=repaint_end,
+                            src_audio_path=src_audio_path,
+                            lora_name_or_path="none"
+                        )
+                    repaint_bnt.click(
+                        fn=repaint_process_func,
+                        inputs=[
+                            input_params_json,
+                            repaint_input_params_json,
+                            retake_variance,
+                            retake_seeds,
+                            repaint_start,
+                            repaint_end,
+                            repaint_source,
+                            repaint_source_audio_upload,
+                            prompt,
+                            lyrics,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                        ],
+                        outputs=repaint_outputs + [repaint_input_params_json],
+                    )
+                with gr.Tab("edit", visible=False):
+                    edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
+                    edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
+                    retake_seeds = gr.Textbox(
+                        label="edit seeds (default None)", placeholder="", value=None
+                    )
+                    edit_type = gr.Radio(
+                        ["only_lyrics", "remix"],
+                        value="only_lyrics",
+                        label="Edit Type",
+                        elem_id="edit_type",
+                        info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
+                    )
+                    edit_n_min = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.01,
+                        value=0.6,
+                        label="edit_n_min",
+                        interactive=True,
+                    )
+                    edit_n_max = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.01,
+                        value=1.0,
+                        label="edit_n_max",
+                        interactive=True,
+                    )
+                    def edit_type_change_func(edit_type):
+                        if edit_type == "only_lyrics":
+                            n_min = 0.6
+                            n_max = 1.0
+                        elif edit_type == "remix":
+                            n_min = 0.2
+                            n_max = 0.4
+                        return n_min, n_max
+                    edit_type.change(
+                        edit_type_change_func,
+                        inputs=[edit_type],
+                        outputs=[edit_n_min, edit_n_max],
+                    )
+                    edit_source = gr.Radio(
+                        ["text2music", "last_edit", "upload"],
+                        value="text2music",
+                        label="Edit Source",
+                        elem_id="edit_source",
+                    )
+                    edit_source_audio_upload = gr.Audio(
+                        label="Upload Audio",
+                        type="filepath",
+                        visible=False,
+                        elem_id="edit_source_audio_upload",
+                        show_download_button=True,
+                    )
+                    edit_source.change(
+                        fn=lambda x: gr.update(
+                            visible=x == "upload", elem_id="edit_source_audio_upload"
+                        ),
+                        inputs=[edit_source],
+                        outputs=[edit_source_audio_upload],
                     )
+                    edit_bnt = gr.Button("Edit", variant="primary")
+                    edit_outputs, edit_input_params_json = create_output_ui("Edit")
+                    def edit_process_func(
+                        text2music_json_data,
                         edit_input_params_json,
                         edit_source,
                         edit_source_audio_upload,
                         guidance_scale_text,
                         guidance_scale_lyric,
                         retake_seeds,
+                    ):
+                        if edit_source == "upload":
+                            src_audio_path = edit_source_audio_upload
+                            audio_duration = librosa.get_duration(filename=src_audio_path)
+                            json_data = {"audio_duration": audio_duration}
+                        elif edit_source == "text2music":
+                            json_data = text2music_json_data
+                            src_audio_path = json_data["audio_path"]
+                        elif edit_source == "last_edit":
+                            json_data = edit_input_params_json
+                            src_audio_path = json_data["audio_path"]
+                        if not edit_prompt:
+                            edit_prompt = prompt
+                        if not edit_lyrics:
+                            edit_lyrics = lyrics
+                        return enhanced_process_func(
+                            json_data["audio_duration"],
+                            prompt,
+                            lyrics,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                            task="edit",
+                            src_audio_path=src_audio_path,
+                            edit_target_prompt=edit_prompt,
+                            edit_target_lyrics=edit_lyrics,
+                            edit_n_min=edit_n_min,
+                            edit_n_max=edit_n_max,
+                            retake_seeds=retake_seeds,
+                            lora_name_or_path="none"
+                        )
+                    edit_bnt.click(
+                        fn=edit_process_func,
+                        inputs=[
+                            input_params_json,
+                            edit_input_params_json,
+                            edit_source,
+                            edit_source_audio_upload,
+                            prompt,
+                            lyrics,
+                            edit_prompt,
+                            edit_lyrics,
+                            edit_n_min,
+                            edit_n_max,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                            retake_seeds,
+                        ],
+                        outputs=edit_outputs + [edit_input_params_json],
+                    )
+                with gr.Tab("extend", visible=False):
+                    extend_seeds = gr.Textbox(
+                        label="extend seeds (default None)", placeholder="", value=None
+                    )
+                    left_extend_length = gr.Slider(
+                        minimum=0.0,
+                        maximum=240.0,
+                        step=0.01,
+                        value=0.0,
+                        label="Left Extend Length",
+                        interactive=True,
+                    )
+                    right_extend_length = gr.Slider(
+                        minimum=0.0,
+                        maximum=240.0,
+                        step=0.01,
+                        value=30.0,
+                        label="Right Extend Length",
+                        interactive=True,
+                    )
+                    extend_source = gr.Radio(
+                        ["text2music", "last_extend", "upload"],
+                        value="text2music",
+                        label="Extend Source",
+                        elem_id="extend_source",
+                    )
+                    extend_source_audio_upload = gr.Audio(
+                        label="Upload Audio",
+                        type="filepath",
+                        visible=False,
+                        elem_id="extend_source_audio_upload",
+                        show_download_button=True,
                     )
+                    extend_source.change(
+                        fn=lambda x: gr.update(
+                            visible=x == "upload", elem_id="extend_source_audio_upload"
+                        ),
+                        inputs=[extend_source],
+                        outputs=[extend_source_audio_upload],
+                    )
+                    extend_bnt = gr.Button("Extend", variant="primary")
+                    extend_outputs, extend_input_params_json = create_output_ui("Extend")
+                    def extend_process_func(
+                        text2music_json_data,
                         extend_input_params_json,
                         extend_seeds,
                         left_extend_length,
                         oss_steps,
                         guidance_scale_text,
                         guidance_scale_lyric,
+                    ):
+                        if extend_source == "upload":
+                            src_audio_path = extend_source_audio_upload
+                            # get audio duration
+                            audio_duration = librosa.get_duration(filename=src_audio_path)
+                            json_data = {"audio_duration": audio_duration}
+                        elif extend_source == "text2music":
+                            json_data = text2music_json_data
+                            src_audio_path = json_data["audio_path"]
+                        elif extend_source == "last_extend":
+                            json_data = extend_input_params_json
+                            src_audio_path = json_data["audio_path"]
+                        repaint_start = -left_extend_length
+                        repaint_end = json_data["audio_duration"] + right_extend_length
+                        return enhanced_process_func(
+                            json_data["audio_duration"],
+                            prompt,
+                            lyrics,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                            retake_seeds=extend_seeds,
+                            retake_variance=1.0,
+                            task="extend",
+                            repaint_start=repaint_start,
+                            repaint_end=repaint_end,
+                            src_audio_path=src_audio_path,
+                            lora_name_or_path="none"
+                        )
+                    extend_bnt.click(
+                        fn=extend_process_func,
+                        inputs=[
+                            input_params_json,
+                            extend_input_params_json,
+                            extend_seeds,
+                            left_extend_length,
+                            right_extend_length,
+                            extend_source,
+                            extend_source_audio_upload,
+                            prompt,
+                            lyrics,
+                            infer_step,
+                            guidance_scale,
+                            scheduler_type,
+                            cfg_type,
+                            omega_scale,
+                            manual_seeds,
+                            guidance_interval,
+                            guidance_interval_decay,
+                            min_guidance_scale,
+                            use_erg_tag,
+                            use_erg_lyric,
+                            use_erg_diffusion,
+                            oss_steps,
+                            guidance_scale_text,
+                            guidance_scale_lyric,
+                        ],
+                        outputs=extend_outputs + [extend_input_params_json],
+                    )
+    # 이벤트 핸들러들
+    def toggle_ref_audio_visibility(is_checked):
+        return (
+            gr.update(visible=is_checked),
+            gr.update(visible=is_checked),
+        )
+    audio2audio_enable.change(
+        fn=toggle_ref_audio_visibility,
+        inputs=[audio2audio_enable],
+        outputs=[ref_audio_input, ref_audio_strength],
+    )
+    genre_preset.change(
+        fn=update_tags_from_preset,
+        inputs=[genre_preset],
+        outputs=[prompt]
+    )
+    quality_preset.change(
+        fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
+        inputs=[quality_preset],
+        outputs=[preset_description]
+    )
+    quality_preset.change(
+        fn=update_quality_preset,
+        inputs=[quality_preset],
+        outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
+    )
+    # 가사 생성 버튼 클릭 시
+    generate_lyrics_btn.click(
+        fn=openai_generate_lyrics,
+        inputs=[topic_for_lyrics],
+        outputs=[lyrics]
+    )
+    # 프리뷰 기능
+    def generate_preview(prompt, lyrics, genre_preset):
+        """10초 프리뷰 생성"""
+        preview_params = {
+            "audio_duration": 10,
+            "infer_step": 50,
+            "guidance_scale": 12.0,
+            "scheduler_type": "euler",
+            "cfg_type": "apg",
+            "omega_scale": 5.0,
+        }
+        enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
+        try:
+            # 실제 구현에서는 빠른 생성 모드 사용
+            result = enhanced_process_func(
+                preview_params["audio_duration"],
+                enhanced_prompt,
+                lyrics[:200],  # 가사 일부만 사용
+                preview_params["infer_step"],
+                preview_params["guidance_scale"],
+                preview_params["scheduler_type"],
+                preview_params["cfg_type"],
+                preview_params["omega_scale"],
+                None,  # manual_seeds
+                0.5,   # guidance_interval
+                0.0,   # guidance_interval_decay
+                3.0,   # min_guidance_scale
+                True,  # use_erg_tag
+                False, # use_erg_lyric
+                True,  # use_erg_diffusion
+                None,  # oss_steps
+                0.0,   # guidance_scale_text
+                0.0,   # guidance_scale_lyric
+                multi_seed_mode="Single"
             )
+            return result[0] if result else None
+        except Exception as e:
+            return f"프리뷰 생성 실패: {str(e)}"
+    preview_bnt.click(
+        fn=generate_preview,
+        inputs=[prompt, lyrics, genre_preset],
+        outputs=[outputs[0]]
+    )
+    def json2output(json_data):
+        return (
+            json_data["audio_duration"],
+            json_data["prompt"],
+            json_data["lyrics"],
+            json_data["infer_step"],
+            json_data["guidance_scale"],
+            json_data["scheduler_type"],
+            json_data["cfg_type"],
+            json_data["omega_scale"],
+            ", ".join(map(str, json_data["actual_seeds"])),
+            json_data["guidance_interval"],
+            json_data["guidance_interval_decay"],
+            json_data["min_guidance_scale"],
+            json_data["use_erg_tag"],
+            json_data["use_erg_lyric"],
+            json_data["use_erg_diffusion"],
+            ", ".join(map(str, json_data["oss_steps"])),
+            (
+                json_data["guidance_scale_text"]
+                if "guidance_scale_text" in json_data
+                else 0.0
+            ),
+            (
+                json_data["guidance_scale_lyric"]
+                if "guidance_scale_lyric" in json_data
+                else 0.0
+            ),
+            (
+                json_data["audio2audio_enable"]
+                if "audio2audio_enable" in json_data
+                else False
+            ),
+            (
+                json_data["ref_audio_strength"]
+                if "ref_audio_strength" in json_data
+                else 0.5
+            ),
+            (
+                json_data["ref_audio_input"]
+                if "ref_audio_input" in json_data
+                else None
+            ),
         )
+    def sample_data(lora_name_or_path_):
+        if sample_data_func:
+            json_data = sample_data_func()
+            return json2output(json_data)
+        return {}
+    sample_bnt.click(
+        sample_data,
+        inputs=[lora_name_or_path],
+        outputs=[
+            audio_duration,
+            prompt,
+            lyrics,
+            infer_step,
+            guidance_scale,
+            scheduler_type,
+            cfg_type,
+            omega_scale,
+            manual_seeds,
+            guidance_interval,
+            guidance_interval_decay,
+            min_guidance_scale,
+            use_erg_tag,
+            use_erg_lyric,
+            use_erg_diffusion,
+            oss_steps,
+            guidance_scale_text,
+            guidance_scale_lyric,
+            audio2audio_enable,
+            ref_audio_strength,
+            ref_audio_input,
+        ],
+    )
+    # 메인 생성 버튼 이벤트
     text2music_bnt.click(
         fn=enhanced_process_func,
         inputs=[
     load_data_func=dump_func,
 ):
     with gr.Blocks(
+        title="ACE-Step Model 1.0 - Enhanced",
+        theme=gr.themes.Soft(
+            primary_hue="blue",
+            secondary_hue="gray",
+            font=["Helvetica", "ui-sans-serif", "system-ui", "sans-serif"],
+        ),
         css="""
         .gradio-container {
+            max-width: 1400px !important;
+            margin: auto !important;
         }
+        /* 그룹 스타일링 */
+        .gr-group {
+            border: 1px solid #e5e7eb !important;
+            border-radius: 8px !important;
+            padding: 16px !important;
+            margin-bottom: 16px !important;
+            background: white !important;
+        }
+        /* 헤더 스타일 */
+        h1 {
+            background: linear-gradient(45deg, #2563eb, #7c3aed);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            text-align: center;
+            font-size: 2.5rem !important;
+            margin-bottom: 0.5rem !important;
+        }
+        h2 {
+            color: #1f2937 !important;
+            font-size: 1.5rem !important;
+            margin-bottom: 1rem !important;
+            font-weight: 600 !important;
+        }
+        /* 버튼 스타일 */
+        .primary {
+            background: linear-gradient(45deg, #2563eb, #3b82f6) !important;
+            color: white !important;
+            font-weight: 600 !important;
+            transition: all 0.3s ease !important;
+        }
+        .primary:hover {
+            transform: translateY(-2px) !important;
+            box-shadow: 0 4px 12px rgba(59, 130, 246, 0.4) !important;
+        }
+        .secondary {
+            background: #f3f4f6 !important;
+            color: #374151 !important;
+            border: 1px solid #e5e7eb !important;
+        }
+        /* 입력 필드 스타일 */
+        input, textarea, .gr-box {
+            border-radius: 6px !important;
+        }
+        /* 아코디언 스타일 */
+        .gr-accordion {
+            border-radius: 8px !important;
+            overflow: hidden !important;
+        }
+        /* 태그 라벨 스타일 */
+        label {
+            font-weight: 500 !important;
+            color: #374151 !important;
+        }
+        /* 퀄리티 설명 박스 */
+        #component-preset_description textarea {
+            background: linear-gradient(45deg, #f0f9ff, #e0f2fe) !important;
+            border: none !important;
+            font-style: italic !important;
         }
         """
     ) as demo:
         gr.Markdown(
             """
+            <h1>🎵 ACE-Step PRO</h1>
+            <div style="text-align: center; margin: 20px 0 30px 0;">
+                <p style="color: #6b7280; font-size: 1.1rem;">
+                    <strong>🚀 Enhanced Features:</strong> AI 가사 생성 | 스마트 프롬프트 | 품질 프리셋 | 다중 생성 모드
                 </p>
             </div>
+            """
         )
+        # 메인 탭
+        with gr.Tab("🎵 Music Generation"):
             create_text2music_ui(
                 gr=gr,
                 text2music_process_func=text2music_process_func,
                 sample_data_func=sample_data_func,
                 load_data_func=load_data_func,
             )
+        # 가이드 탭 추가
+        with gr.Tab("📖 사용법 가이드"):
+            gr.Markdown("""
+            ## 🎯 빠른 시작 가이드
+            ### 1. 기본 사용법
+            - **장르 선택**: 원하는 음악 장르를 선택하면 자동으로 최적화된 태그가 적용됩니다
+            - **품질 설정**: 용도에 맞는 품질을 선택하세요
+              - Draft: 빠른 테스트 (1-2분)
+              - Standard: 일반 사용 (3-5분)
+              - High Quality: 고품질 (8-12분)
+              - Ultra: 최고 품질 (15-20분)
+            ### 2. AI 가사 생성
+            - 가사 주제를 입력하고 "🤖 가사 생성" 버튼을 클릭하면 AI가 자동으로 구조화된 가사를 생성합니다
+            - 생성된 가사는 자유롭게 수정 가능합니다
+            ### 3. 다중 생성 모드
+            - "Best of 3/5/10"을 선택하면 여러 번 생성하여 가장 좋은 품질의 결과를 자동으로 선택합니다
+            - 더 나은 결과를 원할 때 유용합니다
+            ### 4. 프리뷰 기능
+            - "👁️ 미리듣기" 버튼으로 10초 샘플을 빠르게 생성할 수 있습니다
+            - 전체 곡 생성 전에 스타일을 확인할 때 유용합니다
+            ### 💡 품질 향상 팁
+            1. **고품질 생성**: "High Quality" + "Best of 5" 조합 추천
+            2. **빠른 테스트**: "Draft" + "프리뷰" 기능 활용
+            3. **장르 특화**: 장르 프리셋 선택 후 "스마트 향상" 체크
+            4. **가사 구조**: [verse], [chorus], [bridge] 태그를 적극 활용하세요
+            ### 🎵 가사 구조 태그
+            - `[verse]`: 절 (이야기 전개)
+            - `[chorus]`: 후렴구 (반복되는 메인 멜로디)
+            - `[bridge]`: 브릿지 (전환부)
+            - `[instrumental]` or `[inst]`: 연주 구간
+            """)
     return demo
         server_name="0.0.0.0",
         server_port=7860,
         share=True  # 공유 링크 생성
+    )