import os import tempfile import gradio as gr import requests import json from loguru import logger from typing import Optional, Tuple import base64 import time def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_steps, sample_nums): """调用官方Hugging Face Space的API""" try: from gradio_client import Client logger.info("连接到官方 HunyuanVideo-Foley Space...") # 连接到官方Space client = Client("tencent/HunyuanVideo-Foley") logger.info("发送推理请求...") # 调用推理函数 result = client.predict( video_file, # 视频文件 text_prompt, # 文本提示 guidance_scale, # CFG scale inference_steps, # 推理步数 sample_nums, # 样本数量 api_name="/infer_single_video" # API端点名称 ) return result, "✅ 成功通过官方API生成音频!" except Exception as e: error_msg = str(e) logger.error(f"Gradio Client API 调用失败: {error_msg}") if "not found" in error_msg.lower(): return None, "❌ 官方Space的API端点未找到,可能接口已更改" elif "connection" in error_msg.lower(): return None, "❌ 无法连接到官方Space,请检查网络" elif "queue" in error_msg.lower(): return None, "⏳ 官方Space繁忙,请稍后重试" else: return None, f"❌ API调用错误: {error_msg}" def call_huggingface_inference_api(video_file, text_prompt): """调用Hugging Face Inference API""" try: logger.info("尝试Hugging Face Inference API...") API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley" # 读取视频文件 with open(video_file, "rb") as f: video_data = f.read() # 准备请求数据 headers = { "Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}", } # 发送请求 response = requests.post( API_URL, headers=headers, json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}}, timeout=300 ) if response.status_code == 200: # 保存结果 temp_dir = tempfile.mkdtemp() audio_path = os.path.join(temp_dir, "generated_audio.wav") with open(audio_path, 'wb') as f: f.write(response.content) return [audio_path], "✅ 通过Hugging Face API生成成功!" else: logger.error(f"HF API错误: {response.status_code}") return None, f"❌ Hugging Face API返回错误: {response.status_code}" except Exception as e: logger.error(f"HF API调用失败: {str(e)}") return None, f"❌ Hugging Face API调用失败: {str(e)}" def try_alternative_apis(video_file, text_prompt): """尝试其他可能的API服务""" # 1. 尝试通过公开的demo接口 try: logger.info("尝试demo接口...") # 这里可以尝试其他公开的API服务 # 比如Replicate、RunPod等 return None, "❌ 暂无可用的替代API服务" except Exception as e: return None, f"❌ 替代API调用失败: {str(e)}" def smart_api_inference(video_file, text_prompt, guidance_scale=4.5, inference_steps=50, sample_nums=1): """智能API推理 - 尝试多种API调用方式""" if video_file is None: return [], "❌ 请上传视频文件!" if not text_prompt: text_prompt = "audio for this video" logger.info(f"开始API推理: {video_file}") logger.info(f"文本提示: {text_prompt}") status_updates = [] # 方法1: 尝试Gradio Client (最可能成功) status_updates.append("🔄 尝试连接官方Space API...") try: result, status = call_gradio_client_api( video_file, text_prompt, guidance_scale, inference_steps, sample_nums ) if result: return result, "\n".join(status_updates + [status]) status_updates.append(status) except ImportError: status_updates.append("⚠️ gradio_client未安装,跳过官方API调用") # 方法2: 尝试Hugging Face Inference API status_updates.append("🔄 尝试Hugging Face Inference API...") result, status = call_huggingface_inference_api(video_file, text_prompt) if result: return result, "\n".join(status_updates + [status]) status_updates.append(status) # 方法3: 尝试其他API status_updates.append("🔄 尝试替代API服务...") result, status = try_alternative_apis(video_file, text_prompt) status_updates.append(status) # 所有方法都失败了 final_message = "\n".join(status_updates + [ "", "💡 **解决方案建议:**", "• 安装 gradio_client: pip install gradio_client", "• 配置 HF_TOKEN 环境变量", "• 等待官方Space负载降低", "• 本地运行完整模型(需24GB+ RAM)", "", "🔗 **官方Space**: https://huggingface.co/spaces/tencent/HunyuanVideo-Foley" ]) return [], final_message def create_real_api_interface(): """创建真实API调用界面""" css = """ .api-status { background: #f0f8ff; border: 2px solid #4169e1; border-radius: 10px; padding: 1rem; margin: 1rem 0; color: #191970; } """ with gr.Blocks(css=css, title="HunyuanVideo-Foley API Client") as app: # Header gr.HTML("""
API客户端 - 调用真实模型推理