Spaces:
Runtime error
Runtime error
Commit
·
cf9bca8
1
Parent(s):
73e6a9f
Added some experimental preloading of the RMVPE and VC models to hopefully ease inference time. May break stuff.
Browse files- app.py +19 -8
- vc_infer_pipeline.py +17 -9
app.py
CHANGED
|
@@ -33,15 +33,26 @@ limitation = os.getenv("SYSTEM") == "spaces"
|
|
| 33 |
#limitation=True
|
| 34 |
|
| 35 |
audio_mode = []
|
| 36 |
-
f0method_mode = [
|
| 37 |
-
f0method_info = "PM is fast but low quality, crepe and harvest are slow but good quality, RMVPE is the best of both worlds. (Default: RMVPE))"
|
| 38 |
if limitation is True:
|
|
|
|
| 39 |
audio_mode = ["TTS Audio", "Upload audio"]
|
|
|
|
| 40 |
else:
|
|
|
|
| 41 |
audio_mode = ["TTS Audio", "Youtube", "Upload audio"]
|
|
|
|
| 42 |
|
| 43 |
-
if os.path.isfile("rmvpe.pt"):
|
| 44 |
-
f0method_mode.append("rmvpe")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect):
|
| 47 |
try:
|
|
@@ -99,10 +110,10 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
| 99 |
net_g = net_g.half()
|
| 100 |
else:
|
| 101 |
net_g = net_g.float()
|
| 102 |
-
|
| 103 |
|
| 104 |
#Gen audio
|
| 105 |
-
audio_opt =
|
| 106 |
hubert_model,
|
| 107 |
net_g,
|
| 108 |
0,
|
|
@@ -125,7 +136,7 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
| 125 |
)
|
| 126 |
info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
| 127 |
print(f"Successful inference with model {name} | {tts_text} | {info}")
|
| 128 |
-
del net_g,
|
| 129 |
return info, (tgt_sr, audio_opt)
|
| 130 |
except:
|
| 131 |
info = traceback.format_exc()
|
|
@@ -516,7 +527,7 @@ if __name__ == '__main__':
|
|
| 516 |
"#### <center>Original devs:\n"
|
| 517 |
"<center>the RVC Project, lj1995, zomehwh \n\n"
|
| 518 |
"#### <center>Model creators:\n"
|
| 519 |
-
"<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical,
|
| 520 |
)
|
| 521 |
if limitation is True:
|
| 522 |
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|
|
|
|
| 33 |
#limitation=True
|
| 34 |
|
| 35 |
audio_mode = []
|
| 36 |
+
f0method_mode = []
|
|
|
|
| 37 |
if limitation is True:
|
| 38 |
+
f0method_info = "PM is better for testing, RMVPE is better for finalized generations. (Default: RMVPE)"
|
| 39 |
audio_mode = ["TTS Audio", "Upload audio"]
|
| 40 |
+
f0method_mode = ["pm", "rmvpe"]
|
| 41 |
else:
|
| 42 |
+
f0method_info = "PM is fast but low quality, crepe and harvest are slow but good quality, RMVPE is the best of both worlds. (Default: RMVPE)"
|
| 43 |
audio_mode = ["TTS Audio", "Youtube", "Upload audio"]
|
| 44 |
+
f0method_mode = ["pm", "crepe", "harvest", "rmvpe"]
|
| 45 |
|
| 46 |
+
#if os.path.isfile("rmvpe.pt"):
|
| 47 |
+
# f0method_mode.append("rmvpe")
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
#Eagerload VCs
|
| 51 |
+
print("Preloading VCs...")
|
| 52 |
+
vcArr=[]
|
| 53 |
+
vcArr.append(VC(32000, config))
|
| 54 |
+
vcArr.append(VC(40000, config))
|
| 55 |
+
vcArr.append(VC(48000, config))
|
| 56 |
|
| 57 |
def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect):
|
| 58 |
try:
|
|
|
|
| 110 |
net_g = net_g.half()
|
| 111 |
else:
|
| 112 |
net_g = net_g.float()
|
| 113 |
+
vcIdx = int((tgt_sr/8000)-4)
|
| 114 |
|
| 115 |
#Gen audio
|
| 116 |
+
audio_opt = vcArr[vcIdx].pipeline(
|
| 117 |
hubert_model,
|
| 118 |
net_g,
|
| 119 |
0,
|
|
|
|
| 136 |
)
|
| 137 |
info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
|
| 138 |
print(f"Successful inference with model {name} | {tts_text} | {info}")
|
| 139 |
+
del net_g, cpt
|
| 140 |
return info, (tgt_sr, audio_opt)
|
| 141 |
except:
|
| 142 |
info = traceback.format_exc()
|
|
|
|
| 527 |
"#### <center>Original devs:\n"
|
| 528 |
"<center>the RVC Project, lj1995, zomehwh \n\n"
|
| 529 |
"#### <center>Model creators:\n"
|
| 530 |
+
"<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical, Listra92, IshimaIshimsky, ZomballTH, Jotape91, RigidSpinner, RandomAssBettel, Mimizukari, Oida, Shu-Kun, Nhat Minh, Ardha27, Legitdark, TempoHawk, 0x3e9, Kaiaya, Skeetawn, Sonphantrung, Pianissimo, Gloomwastragic, Sunesu, Aimbo, Act8113, Blyxeen\n"
|
| 531 |
)
|
| 532 |
if limitation is True:
|
| 533 |
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|
vc_infer_pipeline.py
CHANGED
|
@@ -13,6 +13,14 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
|
|
| 13 |
|
| 14 |
input_audio_path2wav = {}
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
@lru_cache
|
| 18 |
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
|
@@ -128,14 +136,14 @@ class VC(object):
|
|
| 128 |
f0[pd < 0.1] = 0
|
| 129 |
f0 = f0[0].cpu().numpy()
|
| 130 |
elif f0_method == "rmvpe":
|
| 131 |
-
if hasattr(self, "model_rmvpe") == False:
|
| 132 |
-
from rmvpe import RMVPE
|
| 133 |
-
|
| 134 |
-
print("loading rmvpe model")
|
| 135 |
-
self.model_rmvpe = RMVPE(
|
| 136 |
-
"rmvpe.pt", is_half=self.is_half, device=self.device
|
| 137 |
-
)
|
| 138 |
-
f0 =
|
| 139 |
f0 *= pow(2, f0_up_key / 12)
|
| 140 |
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
|
| 141 |
tf0 = self.sr // self.window # 每秒f0点数
|
|
@@ -440,4 +448,4 @@ class VC(object):
|
|
| 440 |
del pitch, pitchf, sid
|
| 441 |
if torch.cuda.is_available():
|
| 442 |
torch.cuda.empty_cache()
|
| 443 |
-
return audio_opt
|
|
|
|
| 13 |
|
| 14 |
input_audio_path2wav = {}
|
| 15 |
|
| 16 |
+
#Attempting a eagerload of the RMVPE model here.
|
| 17 |
+
from config import Config
|
| 18 |
+
config = Config()
|
| 19 |
+
from rmvpe import RMVPE
|
| 20 |
+
print("Preloading RMVPE model")
|
| 21 |
+
model_rmvpe = RMVPE("rmvpe.pt", is_half=config.is_half, device=config.device)
|
| 22 |
+
del config
|
| 23 |
+
|
| 24 |
|
| 25 |
@lru_cache
|
| 26 |
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
|
|
|
| 136 |
f0[pd < 0.1] = 0
|
| 137 |
f0 = f0[0].cpu().numpy()
|
| 138 |
elif f0_method == "rmvpe":
|
| 139 |
+
## if hasattr(self, "model_rmvpe") == False:
|
| 140 |
+
## from rmvpe import RMVPE
|
| 141 |
+
##
|
| 142 |
+
## print("loading rmvpe model")
|
| 143 |
+
## self.model_rmvpe = RMVPE(
|
| 144 |
+
## "rmvpe.pt", is_half=self.is_half, device=self.device
|
| 145 |
+
## )
|
| 146 |
+
f0 = model_rmvpe.infer_from_audio(x, thred=0.03)
|
| 147 |
f0 *= pow(2, f0_up_key / 12)
|
| 148 |
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
|
| 149 |
tf0 = self.sr // self.window # 每秒f0点数
|
|
|
|
| 448 |
del pitch, pitchf, sid
|
| 449 |
if torch.cuda.is_available():
|
| 450 |
torch.cuda.empty_cache()
|
| 451 |
+
return audio_opt
|