Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,38 +1,44 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
import gradio as gr
|
4 |
-
import random
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
import librosa
|
8 |
import torchaudio
|
9 |
from scipy.signal import resample
|
10 |
import time
|
11 |
-
import
|
12 |
|
13 |
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
14 |
sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
|
15 |
|
16 |
from cosyvoice.cli.cosyvoice import CosyVoice
|
17 |
from cosyvoice.utils.file_utils import load_wav
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
preset_speakers = {
|
21 |
"6歲": {
|
22 |
-
"
|
23 |
"transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
|
24 |
},
|
25 |
"8歲": {
|
26 |
-
"
|
27 |
"transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
|
28 |
}
|
29 |
}
|
30 |
|
31 |
def apply_preset(speaker_key):
|
32 |
if speaker_key in preset_speakers:
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
return None, ""
|
37 |
|
38 |
max_val = 0.8
|
@@ -97,12 +103,6 @@ def synthesize_speech(speaker_audio_path, content_text, speaker_text, speaker_ke
|
|
97 |
torchaudio.save(out_path, torch.tensor(audio_data).unsqueeze(0), sample_rate=target_sr)
|
98 |
return out_path
|
99 |
|
100 |
-
def download_audio_from_url(url, save_path="temp_prompt.wav"):
|
101 |
-
response = requests.get(url)
|
102 |
-
with open(save_path, "wb") as f:
|
103 |
-
f.write(response.content)
|
104 |
-
return save_path
|
105 |
-
|
106 |
with gr.Blocks() as demo:
|
107 |
gr.Markdown("""
|
108 |
# 小睿語音合成
|
@@ -147,4 +147,5 @@ with gr.Blocks() as demo:
|
|
147 |
為了加速,已關閉自動語音辨識,務必正確輸入語音樣本的文字轉寫。
|
148 |
""")
|
149 |
|
150 |
-
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import gradio as gr
|
|
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
import librosa
|
7 |
import torchaudio
|
8 |
from scipy.signal import resample
|
9 |
import time
|
10 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
11 |
|
12 |
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
13 |
sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
|
14 |
|
15 |
from cosyvoice.cli.cosyvoice import CosyVoice
|
16 |
from cosyvoice.utils.file_utils import load_wav
|
17 |
+
|
18 |
+
def download_audio_from_dataset(filename, save_path="temp_prompt.wav"):
|
19 |
+
return hf_hub_download(
|
20 |
+
repo_id="kanahomaisa/breezyvoice-samples",
|
21 |
+
filename=filename,
|
22 |
+
repo_type="dataset"
|
23 |
+
)
|
24 |
|
25 |
preset_speakers = {
|
26 |
"6歲": {
|
27 |
+
"filename": "smile_train.wav",
|
28 |
"transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
|
29 |
},
|
30 |
"8歲": {
|
31 |
+
"filename": "utensils.wav",
|
32 |
"transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
|
33 |
}
|
34 |
}
|
35 |
|
36 |
def apply_preset(speaker_key):
|
37 |
if speaker_key in preset_speakers:
|
38 |
+
filename = preset_speakers[speaker_key]["filename"]
|
39 |
+
transcription = preset_speakers[speaker_key]["transcription"]
|
40 |
+
local_path = download_audio_from_dataset(filename)
|
41 |
+
return local_path, transcription
|
42 |
return None, ""
|
43 |
|
44 |
max_val = 0.8
|
|
|
103 |
torchaudio.save(out_path, torch.tensor(audio_data).unsqueeze(0), sample_rate=target_sr)
|
104 |
return out_path
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
with gr.Blocks() as demo:
|
107 |
gr.Markdown("""
|
108 |
# 小睿語音合成
|
|
|
147 |
為了加速,已關閉自動語音辨識,務必正確輸入語音樣本的文字轉寫。
|
148 |
""")
|
149 |
|
150 |
+
if __name__ == "__main__":
|
151 |
+
demo.launch()
|