Spaces:
Running
Running
import os | |
import torch | |
import torchaudio | |
#def normalize_wav(waveform): | |
# waveform = waveform - torch.mean(waveform) | |
# waveform = waveform / (torch.max(torch.abs(waveform[0, :])) + 1e-8) | |
# return waveform * 0.5 | |
def normalize_wav(waveform, waveform_ref): | |
waveform = waveform / (torch.max(torch.abs(waveform))) * (torch.max(torch.abs(waveform_ref))) | |
return waveform | |
with open("/ailab-train/speech/zhanghaomin/codes3/F5-TTS-main/data/v2c_test.lst", "r") as fr: | |
lines = fr.readlines() | |
v2a_path = "/ailab-train/speech/zhanghaomin/codes3/MMAudio-main/output_v2c_neg/" | |
output_dir = "outputs_v2a/" | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
if not os.path.exists(output_dir+"/ref/"): | |
os.makedirs(output_dir+"/ref/") | |
if not os.path.exists(output_dir+"/gen/"): | |
os.makedirs(output_dir+"/gen/") | |
if not os.path.exists(output_dir+"/tgt/"): | |
os.makedirs(output_dir+"/tgt/") | |
for idx, line in enumerate(lines): | |
wav_p, video_p, txt_p, wav, video, txt = line.strip().split("\t") | |
v2a_audio = v2a_path + video.replace("/", "__") + ".flac" | |
v2a_audio_p = v2a_path + video_p.replace("/", "__") + ".flac" | |
waveform, sr = torchaudio.load(wav) | |
if sr != 24000: | |
waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=24000) | |
waveform_p, sr = torchaudio.load(wav_p) | |
if sr != 24000: | |
waveform_p = torchaudio.functional.resample(waveform_p, orig_freq=sr, new_freq=24000) | |
waveform_v2a, sr = torchaudio.load(v2a_audio) | |
if sr != 24000: | |
waveform_v2a = torchaudio.functional.resample(waveform_v2a, orig_freq=sr, new_freq=24000) | |
torchaudio.save(output_dir+"/ref/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000) | |
torchaudio.save(output_dir+"/gen/"+str(idx).zfill(8)+".wav", normalize_wav(waveform_v2a[0:1,:], waveform_p[0:1,:]), 24000) | |
torchaudio.save(output_dir+"/tgt/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000) | |
if not os.path.exists(output_dir+"/ref_nonorm/"): | |
os.makedirs(output_dir+"/ref_nonorm/") | |
if not os.path.exists(output_dir+"/gen_nonorm/"): | |
os.makedirs(output_dir+"/gen_nonorm/") | |
if not os.path.exists(output_dir+"/tgt_nonorm/"): | |
os.makedirs(output_dir+"/tgt_nonorm/") | |
torchaudio.save(output_dir+"/ref_nonorm/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000) | |
torchaudio.save(output_dir+"/gen_nonorm/"+str(idx).zfill(8)+".wav", waveform_v2a[0:1,:], 24000) | |
torchaudio.save(output_dir+"/tgt_nonorm/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000) | |