File size: 12,559 Bytes
6a441dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# app.py (Final Version with Download Feature)
import gradio as gr
import google.generativeai as genai
import os
import time
from pydub import AudioSegment

# --- 0. 全局配置 / Global Configuration ---
# API Key will be loaded from Hugging Face Secrets
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Hugging Face Secret 'GOOGLE_API_KEY' not found!")

genai.configure(api_key=GOOGLE_API_KEY)
gemini_model = genai.GenerativeModel('gemini-1.5-flash-latest')

EQ_BANDS_HZ = [60, 170, 310, 600, 1000, 3000, 6000, 12000, 14000, 16000]
EQ_RANGE_DB = 12

# --- 1. 双语文本库 / Bilingual Text Library (已更新提示信息) ---
LANG = {
    'zh': {
        'title': "FeiMatrix 智能动态调音", 'subtitle': "上传一首歌,告诉我你想要的感觉,剩下的交给我!", 'lang_label': "语言 / Language",
        'step1_header': "第一步:上传音频", 'upload_label': "点击或拖拽 MP3 文件到这里",
        'step2_header': "第二步:告诉我你的感觉", 'custom_input_label': "用你自己的话描述想要的感觉(推荐):",
        'custom_input_placeholder': "例如:我想要吉他声更清脆,鼓声更有力...", 'quick_choice_label': "或者,快速选择一个预设风格:",
        'quick_choices': [ "我想要咚咚咚的重低音!", "让高音更亮、更清楚", "让唱歌的声音更突出", "我喜欢温暖、厚实的感觉", "给我一种现场演唱会的空间感", "保持原汁原味,但细节多一点", "无 (使用上面的输入框)", ],
        'default_choice': "无 (使用上面的输入框)", 'step3_header': "第三步:开始调音", 'process_button': "开始智能调音!",
        'log_header': "AI 调音师工作日志", 'log_initial': "`[系统]`:我准备好啦,等你上传文件哦~", 'result_header': "第四步:聆听并下载您的定制版",
        'result_label': "这里会显示AI调音后的音频", 'accordion_header': "(高级)查看 AI 定制的 EQ 参数", 'err_no_file': "哎呀,忘了上传MP3文件啦!",
        'info_no_pref': "您没有指定风格,将为您进行温和的细节优化。", 'status_analyzing': "`[AI分析师]`:收到!正在分析您的音频... ⏳",
        'status_analysis_failed': "AI分析失败: {e}。将使用默认调音策略。", 'status_understanding': "`[AI分析师]`:分析完成!\n> {analysis}\n\n`[AI调音师]`:正在理解{choice}并调整EQ... 🔊",
        'status_tuning': "`[AI调音师]`:好嘞!已经按您的要求调整好了!\n\n`[系统]`:正在生成音频... 🎶",
        # --- 修改点 1: 更新完成提示 ---
        'status_done': "`[系统]`:搞定!您的AI定制版音频已生成!🎉\n(点击播放器右侧的 **⋮** 即可下载)",
    },
    'en': {
        'title': "FeiMatrix AI Dynamic Equalizer", 'subtitle': "Upload a song, tell me the vibe you want, and I'll handle the rest!", 'lang_label': "Language / 语言",
        'step1_header': "Step 1: Upload Audio", 'upload_label': "Click or drag your MP3 file here",
        'step2_header': "Step 2: Tell Me Your Vibe", 'custom_input_label': "Describe the feeling you want in your own words (Recommended):",
        'custom_input_placeholder': "e.g., I want the guitar to be crisper and the drums more powerful...", 'quick_choice_label': "Or, quickly pick a preset style:",
        'quick_choices': [ "I want that 'thump-thump' heavy bass!", "Make the treble brighter and clearer", "Make the vocals stand out more", "I like a warm and rich sound", "Give me a live concert feeling", "Keep it natural, just add more detail", "None (use the input box above)", ],
        'default_choice': "None (use the input box above)", 'step3_header': "Step 3: Start Tuning", 'process_button': "Start AI Tuning!",
        'log_header': "AI Tuning Engineer's Log", 'log_initial': "`[System]`: Ready when you are, just upload a file~", 'result_header': "Step 4: Listen & Download Your Custom Version",
        'result_label': "Your AI-tuned audio will appear here", 'accordion_header': "(Advanced) View AI-Customized EQ Parameters", 'err_no_file': "Oops, you forgot to upload the MP3 file!",
        'info_no_pref': "You didn't specify a style, so I'll perform a gentle detail enhancement.", 'status_analyzing': "`[AI Analyst]`: Roger that! Analyzing your audio... ⏳",
        'status_analysis_failed': "AI analysis failed: {e}. Default tuning strategy will be used.", 'status_understanding': "`[AI Analyst]`: Analysis complete!\n> {analysis}\n\n`[AI Tuning Engineer]`: Understanding {choice} and adjusting EQ... 🔊",
        'status_tuning': "`[AI Tuning Engineer]`: Alright! Tuned to your request!\n\n`[System]`: Generating audio... 🎶",
        # --- 修改点 1: 更新完成提示 ---
        'status_done': "`[System]`: All set! Your AI custom audio is ready! 🎉\n(Click the **⋮** icon on the right of the player to download)",
    }
}
LANG_MAP = {"简体中文": "zh", "English": "en"}

def get_ai_tuned_eq_settings(audio_analysis_text, user_preference):
    eq_values = [0.0] * len(EQ_BANDS_HZ); pref_lower = user_preference.lower()
    if "电子舞曲" in audio_analysis_text or "EDM" in audio_analysis_text: eq_values = [4.0, 2.5, -1.5, -0.5, 1.0, 2.0, 3.5, 4.0, 2.5, 1.0]
    elif "爵士乐" in audio_analysis_text or "warm" in audio_analysis_text: eq_values = [3.0, 1.5, -0.5, -1.0, 0.5, 1.5, 2.0, 3.0, 2.0, 0.5]
    elif "人声" in audio_analysis_text or "vocal" in audio_analysis_text: eq_values[4] += 0.5; eq_values[5] += 1.0
    if any(k in pref_lower for k in ["低音", "bass", "thump", "punch", "蹦迪"]): eq_values[0] += 3.0; eq_values[1] += 1.5
    if any(k in pref_lower for k in ["高音", "treble", "bright", "clear", "crisp"]): eq_values[6]+=1.5; eq_values[7]+=2.0; eq_values[8]+=1.0
    if any(k in pref_lower for k in ["人声", "vocal", "singing", "lyrics"]): eq_values[4]+=1.5; eq_values[5]+=1.0; eq_values[3]-=0.5
    if any(k in pref_lower for k in ["温暖", "warm", "rich", "soft"]): eq_values[1]+=1.5; eq_values[2]+=1.0; eq_values[6]-=0.5
    if any(k in pref_lower for k in ["空间", "space", "live", "concert"]): eq_values[6]+=1.0; eq_values[7]+=1.0; eq_values[4]-=0.5
    if any(k in pref_lower for k in ["自然", "natural", "original"]): [v*0.5 for v in eq_values]; eq_values[7]+=0.5
    eq_values=[max(-EQ_RANGE_DB,min(EQ_RANGE_DB,v)) for v in eq_values]; return {f'{f} Hz':v for f,v in zip(EQ_BANDS_HZ, eq_values)}

def apply_eq_to_audio(audio_path, eq_settings):
    try: audio = AudioSegment.from_file(audio_path)
    except Exception as e: print(f"Audio load error: {e}"); return None
    q_factor=1.414; filter_parts=[]
    for band, gain in eq_settings.items():
        if gain != 0: filter_parts.append(f"equalizer=f={band.split(' ')[0]}:width_type=q:w={q_factor}:g={gain}")
    if not filter_parts: return audio_path
    output_path = f"{os.path.splitext(audio_path)[0]}_eq.mp3"
    try:
        audio.export(output_path, format="mp3", parameters=["-af", ",".join(filter_parts)])
        return output_path
    except Exception as e: print(f"EQ apply error: {e}"); raise gr.Error("Failed to apply EQ! This might be an issue with ffmpeg on the server.")

def process_and_tune(audio_file, quick_choice, custom_input, lang_choice):
    lang_code = LANG_MAP[lang_choice]; L = LANG[lang_code]
    if not audio_file: raise gr.Error(L['err_no_file'])
    if custom_input and custom_input.strip(): final_preference = custom_input
    elif L['default_choice'] not in quick_choice: final_preference = quick_choice
    else: final_preference = L['quick_choices'][-2]; gr.Info(L['info_no_pref'])
    slider_updates={s: gr.update(value=0) for s in eq_sliders}
    yield {status_log_md: gr.update(value=L['status_analyzing']), processed_audio_output: gr.update(value=None), eq_accordion: gr.update(visible=True, open=False), **slider_updates}
    try:
        prompt = "Briefly analyze this audio's genre, mood, and key instruments."; response = gemini_model.generate_content([genai.upload_file(path=audio_file.name), prompt])
        audio_analysis_text = response.text or "(AI did not provide a detailed analysis)"
    except Exception as e: audio_analysis_text = L['status_analysis_failed'].format(e=e); gr.Warning(audio_analysis_text)
    choice_desc = f"“{final_preference}”"
    yield {status_log_md: gr.update(value=L['status_understanding'].format(analysis=audio_analysis_text, choice=choice_desc))}; time.sleep(1)
    eq_settings = get_ai_tuned_eq_settings(audio_analysis_text, final_preference); slider_updates = {s: gr.update(value=v) for s, v in zip(eq_sliders, eq_settings.values())}
    yield {status_log_md: gr.update(value=L['status_tuning']), **slider_updates}; time.sleep(1)
    eq_audio_path = apply_eq_to_audio(audio_file.name, eq_settings)
    if not eq_audio_path: raise gr.Error("Audio processing failed!")
    yield {status_log_md: gr.update(value=L['status_done']), processed_audio_output: gr.update(value=eq_audio_path, label=L['result_label'], autoplay=True), eq_accordion: gr.update(open=False)}

def update_language(lang_choice):
    L = LANG[LANG_MAP[lang_choice]]
    return {
        title_md: gr.update(value=f"# {L['title']}"), subtitle_md: gr.update(value=L['subtitle']), step1_header_md: gr.update(value=f"### **{L['step1_header']}**"),
        audio_input: gr.update(label=L['upload_label']), step2_header_md: gr.update(value=f"### **{L['step2_header']}**"), custom_input: gr.update(label=L['custom_input_label'], placeholder=L['custom_input_placeholder']),
        quick_choice: gr.update(label=L['quick_choice_label'], choices=L['quick_choices'], value=L['default_choice']), step3_header_md: gr.update(value=f"### **{L['step3_header']}**"),
        process_button: gr.update(value=L['process_button']), log_header_md: gr.update(value=f"### **{L['log_header']}**"), status_log_md: gr.update(value=L['log_initial']),
        result_header_md: gr.update(value=f"### **{L['result_header']}**"), processed_audio_output: gr.update(label=L['result_label']), eq_accordion: gr.update(label=L['accordion_header']),
    }

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    lang_switcher = gr.Radio(choices=["简体中文", "English"], value="简体中文", label="Language / 语言", info="选择界面语言 / Select UI Language")
    title_md = gr.Markdown("# FeiMatrix 智能动态调音")
    subtitle_md = gr.Markdown("上传一首歌,告诉我你想要的感觉,剩下的交给我!")
    with gr.Column():
        step1_header_md = gr.Markdown("### **第一步:上传音频**"); audio_input = gr.File(label="点击或拖拽 MP3 文件到这里", type="filepath", file_types=[".mp3"])
        step2_header_md = gr.Markdown("### **第二步:告诉我你的感觉**"); custom_input = gr.Textbox(label="用你自己的话描述想要的感觉(推荐):", placeholder="例如:我想要吉他声更清脆,鼓声更有力...", lines=2)
        quick_choice = gr.Radio(label="或者,快速选择一个预设风格:", choices=LANG['zh']['quick_choices'], value=LANG['zh']['default_choice'])
        step3_header_md = gr.Markdown("### **第三步:开始调音**"); process_button = gr.Button("开始智能调音!", variant="primary")
        log_header_md = gr.Markdown("### **AI 调音师工作日志**"); status_log_md = gr.Markdown("`[系统]`:我准备好啦,等你上传文件哦~")
        result_header_md = gr.Markdown("### **第四步:聆听并下载您的定制版**"); 
        # --- 修改点 2: 添加 show_download_button=True ---
        processed_audio_output = gr.Audio(label="这里会显示AI调音后的音频", type="filepath", interactive=True, show_download_button=True)
    with gr.Accordion("(高级)查看 AI 定制的 EQ 参数", open=False, visible=False) as eq_accordion:
        eq_sliders = [gr.Slider(minimum=-EQ_RANGE_DB, maximum=EQ_RANGE_DB, value=0, step=0.5, label=f"{f} Hz", interactive=False) for f in EQ_BANDS_HZ]
    all_ui_outputs = [title_md, subtitle_md, step1_header_md, audio_input, step2_header_md, custom_input, quick_choice, step3_header_md, process_button, log_header_md, status_log_md, result_header_md, processed_audio_output, eq_accordion]
    lang_switcher.change(fn=update_language, inputs=lang_switcher, outputs=all_ui_outputs, queue=False)
    process_button.click(fn=process_and_tune, inputs=[audio_input, quick_choice, custom_input, lang_switcher], outputs=[status_log_md, processed_audio_output, eq_accordion, *eq_sliders])

if __name__ == "__main__":
    demo.launch(mcp_server=True)