import torch import torchaudio from pydub import AudioSegment import gradio as gr _original_load = torch.load def cpu_load(*args, **kwargs): kwargs["map_location"] = torch.device('cpu') return _original_load(*args, **kwargs) torch.load = cpu_load hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cpu() acoustic = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cpu() hifigan = torch.hub.load("bshall/hifigan:main", "hifigan_hubert_soft", trust_repo=True).cpu() def soft_vc(audio_path): AudioSegment.from_file(audio_path).set_frame_rate(16000).set_channels(1).export(audio_path, format="wav") source = torchaudio.load(audio_path)[0].unsqueeze(0).cpu() with torch.inference_mode(): target = hifigan(acoustic.generate(hubert.units(source)).transpose(1, 2)) torchaudio.save("output.wav", target.squeeze(0).cpu(), 16000) return "output.wav" gr.Interface(soft_vc,gr.Audio(label="Input Audio",type="filepath"),gr.Audio(label="Output Audio",type="filepath")).launch()