import torch
import torchaudio
from pydub import AudioSegment
import gradio as gr
_original_load = torch.load
def cpu_load(*args, **kwargs):
    kwargs["map_location"] = torch.device('cpu')
    return _original_load(*args, **kwargs)
torch.load = cpu_load

hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cpu()
acoustic = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cpu()
hifigan = torch.hub.load("bshall/hifigan:main", "hifigan_hubert_soft", trust_repo=True).cpu()

def soft_vc(audio_path):
  AudioSegment.from_file(audio_path).set_frame_rate(16000).set_channels(1).export(audio_path, format="wav")
  source = torchaudio.load(audio_path)[0].unsqueeze(0).cpu()
  with torch.inference_mode():
      target = hifigan(acoustic.generate(hubert.units(source)).transpose(1, 2))
  torchaudio.save("output.wav", target.squeeze(0).cpu(), 16000)
  return "output.wav"

gr.Interface(soft_vc,gr.Audio(label="Input Audio",type="filepath"),gr.Audio(label="Output Audio",type="filepath")).launch()