hunyuanvideo-foley / test_audio.py
wzy013's picture
实现超级兼容的音频生成解决方案 - 彻底解决后端错误
55d09cb
raw
history blame
3.54 kB
"""
测试音频生成和 Gradio 兼容性
"""
import gradio as gr
import numpy as np
import wave
import tempfile
import os
from loguru import logger
def create_test_audio(text_prompt: str = "test audio") -> str:
"""创建测试音频文件"""
sample_rate = 44100
duration = 3.0
duration_samples = int(duration * sample_rate)
# 使用 numpy 生成音频
t = np.linspace(0, duration, duration_samples, dtype=np.float32)
# 根据文本生成不同音频
if "footsteps" in text_prompt.lower():
audio = 0.4 * np.sin(2 * np.pi * 2 * t) * np.exp(-3 * (t % 0.5))
elif "rain" in text_prompt.lower():
audio = 0.3 * np.random.randn(duration_samples)
else:
audio = 0.3 * np.sin(2 * np.pi * 440 * t)
# 应用包络
envelope = np.ones_like(audio)
fade_samples = int(0.1 * sample_rate)
envelope[:fade_samples] = np.linspace(0, 1, fade_samples)
envelope[-fade_samples:] = np.linspace(1, 0, fade_samples)
audio *= envelope
# 保存为 WAV 文件
temp_dir = tempfile.mkdtemp()
audio_path = os.path.join(temp_dir, "test_audio.wav")
# 规范化到 int16
audio_normalized = np.clip(audio, -1.0, 1.0)
audio_int16 = (audio_normalized * 32767).astype(np.int16)
# 使用 wave 模块保存
with wave.open(audio_path, 'w') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(sample_rate)
wav_file.writeframes(audio_int16.tobytes())
logger.info(f"✅ 测试音频已保存: {audio_path}")
return audio_path
def test_interface(text_input):
"""测试接口"""
try:
# 生成音频
audio_path = create_test_audio(text_input)
# 检查文件是否存在
if os.path.exists(audio_path):
file_size = os.path.getsize(audio_path)
status = f"✅ 成功生成音频!\n文件路径: {audio_path}\n文件大小: {file_size} bytes\n文本: {text_input}"
return audio_path, status
else:
return None, "❌ 音频文件未生成"
except Exception as e:
logger.error(f"错误: {e}")
return None, f"❌ 生成失败: {str(e)}"
# 创建 Gradio 界面
def create_test_app():
with gr.Blocks(title="Audio Test") as app:
gr.HTML("<h1>🎵 音频兼容性测试</h1>")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="文本输入",
value="footsteps on ground",
placeholder="输入文本描述..."
)
generate_btn = gr.Button("生成测试音频", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="生成的音频")
status_output = gr.Textbox(
label="状态信息",
lines=5,
interactive=False
)
generate_btn.click(
fn=test_interface,
inputs=[text_input],
outputs=[audio_output, status_output]
)
return app
if __name__ == "__main__":
# 设置日志
logger.remove()
logger.add(lambda msg: print(msg, end=''), level="INFO")
logger.info("启动音频测试应用...")
app = create_test_app()
app.launch(
server_name="0.0.0.0",
server_port=7861,
share=False,
debug=True
)