Spaces:
Running
Running
import os | |
try: | |
import torchaudio | |
except ImportError: | |
os.system("cd ./F5-TTS; pip install -e .") | |
import spaces | |
import logging | |
from datetime import datetime | |
from pathlib import Path | |
import gradio as gr | |
import torch | |
import torchaudio | |
import tempfile | |
import requests | |
import shutil | |
import numpy as np | |
from huggingface_hub import hf_hub_download | |
model_path = "./F5-TTS/ckpts/v2c/" | |
if not os.path.exists(model_path): | |
os.makedirs(model_path) | |
file_path = hf_hub_download(repo_id="lshzhm/DeepAudio-V1", filename="v2c_s44.pt", local_dir=model_path) | |
print(f"Model saved at: {file_path}") | |
log = logging.getLogger() | |
#@spaces.GPU(duration=120) | |
def video_to_audio_and_speech(video: gr.Video, prompt: str, v2a_num_steps: int, text: str, audio_prompt: gr.Audio, text_prompt: str, v2s_num_steps: int): | |
video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name | |
audio_p_path = tempfile.NamedTemporaryFile(delete=False, suffix='.wav').name | |
output_dir = os.path.dirname(video_path) | |
video_save_path = str(output_dir) + "/" + str(video_path).replace("/", "__").strip(".") + ".mp4" | |
print("paths", video, video_path, output_dir, video_save_path) | |
print("paths", audio_prompt, audio_p_path, audio_prompt[1].shape, audio_prompt[1].max(), audio_prompt[1].min(), type(audio_prompt[1])) | |
if video.startswith("http"): | |
data = requests.get(video, timeout=60).content | |
with open(video_path, "wb") as fw: | |
fw.write(data) | |
else: | |
shutil.copy(video, video_path) | |
if isinstance(audio_prompt, tuple): | |
sr, data = audio_prompt | |
torchaudio.save(audio_p_path, torch.from_numpy(data.reshape(1,-1)/32768.0).to(torch.float32), sr) | |
elif audio_prompt.startswith("http"): | |
data = requests.get(audio_prompt, timeout=60).content | |
with open(audio_p_path, "wb") as fw: | |
fw.write(data) | |
else: | |
shutil.copy(audio_prompt, audio_p_path) | |
if prompt == "": | |
command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --calc_energy 1 --num_steps %d" % (output_dir, video_path, v2a_num_steps) | |
else: | |
command = "cd ./MMAudio; python ./demo.py --variant small_44k --output %s --video %s --prompt %s --calc_energy 1 --num_steps %d" % (output_dir, video_path, prompt, v2a_num_steps) | |
print("v2a command", command) | |
os.system(command) | |
video_gen = video_save_path[:-4]+".mp4.gen.mp4" | |
command = "python ./F5-TTS/src/f5_tts/infer/infer_cli_test.py --output_dir %s --start 0 --end 1 --ckpt_file ./F5-TTS/ckpts/v2c/v2c_s44.pt --v2a_path %s --wav_p %s --txt_p \"%s\" --video %s --v2a_wav %s --txt \"%s\" --nfe_step %d" % (output_dir, output_dir, audio_p_path, text_prompt, video_save_path, video_save_path[:-4]+".flac", text, v2s_num_steps) | |
print("v2s command", command, video_gen) | |
os.system(command) | |
return video_save_path, video_gen | |
video_to_audio_and_speech_tab = gr.Interface( | |
fn=video_to_audio_and_speech, | |
description=""" | |
Project page: <a href="https://acappemin.github.io/DeepAudio-V1.github.io">https://acappemin.github.io/DeepAudio-V1.github.io</a><br> | |
Code: <a href="https://github.com/acappemin/DeepAudio-V1">https://github.com/acappemin/DeepAudio-V1</a><br> | |
""", | |
inputs=[ | |
gr.Video(label="Input Video"), | |
gr.Text(label='Video-to-Audio Text Prompt'), | |
gr.Number(label='Video-to-Audio Num Steps', value=25, precision=0, minimum=1), | |
gr.Text(label='Video-to-Speech Transcription'), | |
gr.Audio(label='Video-to-Speech Speech Prompt'), | |
gr.Text(label='Video-to-Speech Speech Prompt Transcription'), | |
gr.Number(label='Video-to-Speech Num Steps', value=32, precision=0, minimum=1), | |
], | |
outputs=[ | |
gr.Video(label="Video-to-Audio Output"), | |
gr.Video(label="Video-to-Speech Output"), | |
], | |
cache_examples=False, | |
title='Video-to-Audio-and-Speech', | |
examples=[ | |
[ | |
'./tests/0235.mp4', | |
'', | |
25, | |
"Who finally decided to show up for work Yay", | |
'./tests/Gobber-00-0778.wav', | |
"I've still got a few knocking around in here", | |
32, | |
], | |
[ | |
'./tests/0778.mp4', | |
'', | |
25, | |
"I've still got a few knocking around in here", | |
'./tests/Gobber-00-0235.wav', | |
"Who finally decided to show up for work Yay", | |
32, | |
], | |
]) | |
if __name__ == "__main__": | |
gr.TabbedInterface([video_to_audio_and_speech_tab], ['Video-to-Audio-and-Speech']).queue(max_size=1).launch() | |