Spaces:
Running
Running
File size: 2,602 Bytes
99bbd30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import torch
import torchaudio
#def normalize_wav(waveform):
# waveform = waveform - torch.mean(waveform)
# waveform = waveform / (torch.max(torch.abs(waveform[0, :])) + 1e-8)
# return waveform * 0.5
def normalize_wav(waveform, waveform_ref):
waveform = waveform / (torch.max(torch.abs(waveform))) * (torch.max(torch.abs(waveform_ref)))
return waveform
with open("/ailab-train/speech/zhanghaomin/codes3/F5-TTS-main/data/v2c_test.lst", "r") as fr:
lines = fr.readlines()
v2a_path = "/ailab-train/speech/zhanghaomin/codes3/MMAudio-main/output_v2c_neg/"
output_dir = "outputs_v2a/"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not os.path.exists(output_dir+"/ref/"):
os.makedirs(output_dir+"/ref/")
if not os.path.exists(output_dir+"/gen/"):
os.makedirs(output_dir+"/gen/")
if not os.path.exists(output_dir+"/tgt/"):
os.makedirs(output_dir+"/tgt/")
for idx, line in enumerate(lines):
wav_p, video_p, txt_p, wav, video, txt = line.strip().split("\t")
v2a_audio = v2a_path + video.replace("/", "__") + ".flac"
v2a_audio_p = v2a_path + video_p.replace("/", "__") + ".flac"
waveform, sr = torchaudio.load(wav)
if sr != 24000:
waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=24000)
waveform_p, sr = torchaudio.load(wav_p)
if sr != 24000:
waveform_p = torchaudio.functional.resample(waveform_p, orig_freq=sr, new_freq=24000)
waveform_v2a, sr = torchaudio.load(v2a_audio)
if sr != 24000:
waveform_v2a = torchaudio.functional.resample(waveform_v2a, orig_freq=sr, new_freq=24000)
torchaudio.save(output_dir+"/ref/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000)
torchaudio.save(output_dir+"/gen/"+str(idx).zfill(8)+".wav", normalize_wav(waveform_v2a[0:1,:], waveform_p[0:1,:]), 24000)
torchaudio.save(output_dir+"/tgt/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000)
if not os.path.exists(output_dir+"/ref_nonorm/"):
os.makedirs(output_dir+"/ref_nonorm/")
if not os.path.exists(output_dir+"/gen_nonorm/"):
os.makedirs(output_dir+"/gen_nonorm/")
if not os.path.exists(output_dir+"/tgt_nonorm/"):
os.makedirs(output_dir+"/tgt_nonorm/")
torchaudio.save(output_dir+"/ref_nonorm/"+str(idx).zfill(8)+".wav", waveform_p[0:1,:], 24000)
torchaudio.save(output_dir+"/gen_nonorm/"+str(idx).zfill(8)+".wav", waveform_v2a[0:1,:], 24000)
torchaudio.save(output_dir+"/tgt_nonorm/"+str(idx).zfill(8)+".wav", waveform[0:1,:], 24000)
|