DeepAudio-V1 / eval /generate_v2a.py
lshzhm's picture
init commit
99bbd30 verified
import os
import torch
import torchaudio
#def normalize_wav(waveform):
# waveform = waveform - torch.mean(waveform)
# waveform = waveform / (torch.max(torch.abs(waveform[0, :])) + 1e-8)
# return waveform * 0.5
def normalize_wav(waveform, waveform_ref):
waveform = waveform / (torch.max(torch.abs(waveform))) * (torch.max(torch.abs(waveform_ref)))
return waveform
with open("/ailab-train/speech/zhanghaomin/codes3/F5-TTS-main/data/v2c_test.lst", "r") as fr:
lines = fr.readlines()
v2a_path = "/ailab-train/speech/zhanghaomin/codes3/MMAudio-main/output_v2c_neg/"
output_dir = "outputs_v2a/"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not os.path.exists(output_dir+"/ref/"):
os.makedirs(output_dir+"/ref/")
if not os.path.exists(output_dir+"/gen/"):
os.makedirs(output_dir+"/gen/")
if not os.path.exists(output_dir+"/tgt/"):
os.makedirs(output_dir+"/tgt/")
for idx, line in enumerate(lines):
wav_p, video_p, txt_p, wav, video, txt = line.strip().split("\t")
v2a_audio = v2a_path + video.replace("/", "__") + ".flac"
v2a_audio_p = v2a_path + video_p.replace("/", "__") + ".flac"
waveform, sr = torchaudio.load(wav)
if sr != 24000:
waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=24000)
waveform_p, sr = torchaudio.load(wav_p)
if sr != 24000:
waveform_p = torchaudio.functional.resample(waveform_p, orig_freq=sr, new_freq=24000)
waveform_v2a, sr = torchaudio.load(v2a_audio)
if sr != 24000:
waveform_v2a = torchaudio.functional.resample(waveform_v2a, orig_freq=sr, new_freq=24000)
torchaudio.save(output_dir+"/ref/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000)
torchaudio.save(output_dir+"/gen/"+str(idx).zfill(8)+".wav", normalize_wav(waveform_v2a[0:1,:], waveform_p[0:1,:]), 24000)
torchaudio.save(output_dir+"/tgt/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000)
if not os.path.exists(output_dir+"/ref_nonorm/"):
os.makedirs(output_dir+"/ref_nonorm/")
if not os.path.exists(output_dir+"/gen_nonorm/"):
os.makedirs(output_dir+"/gen_nonorm/")
if not os.path.exists(output_dir+"/tgt_nonorm/"):
os.makedirs(output_dir+"/tgt_nonorm/")
torchaudio.save(output_dir+"/ref_nonorm/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000)
torchaudio.save(output_dir+"/gen_nonorm/"+str(idx).zfill(8)+".wav", waveform_v2a[0:1,:], 24000)
torchaudio.save(output_dir+"/tgt_nonorm/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000)