kanahomaisa commited on
Commit
3a37013
·
verified ·
1 Parent(s): ae3a712

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -7,7 +7,9 @@ import librosa
7
  import torchaudio
8
  from scipy.signal import resample
9
  import time
10
- from huggingface_hub import snapshot_download, hf_hub_download
 
 
11
 
12
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
13
  sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
@@ -15,29 +17,31 @@ sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
15
  from cosyvoice.cli.cosyvoice import CosyVoice
16
  from cosyvoice.utils.file_utils import load_wav
17
 
18
- def download_audio_from_dataset(filename, save_path="temp_prompt.wav"):
19
- return hf_hub_download(
20
- repo_id="kanahomaisa/breezyvoice-samples",
21
- filename=filename,
22
- repo_type="dataset"
23
- )
 
 
24
 
25
  preset_speakers = {
26
  "6歲": {
27
- "filename": "smile_train.wav",
28
  "transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
29
  },
30
  "8歲": {
31
- "filename": "utensils.wav",
32
  "transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
33
  }
34
  }
35
 
36
  def apply_preset(speaker_key):
37
  if speaker_key in preset_speakers:
38
- filename = preset_speakers[speaker_key]["filename"]
39
  transcription = preset_speakers[speaker_key]["transcription"]
40
- local_path = download_audio_from_dataset(filename)
41
  return local_path, transcription
42
  return None, ""
43
 
@@ -107,9 +111,8 @@ with gr.Blocks() as demo:
107
  gr.Markdown("""
108
  # 小睿語音合成
109
  選擇預設語音 (6歲、8歲) 或上傳5~15秒語音樣本及該則語音樣本的轉寫,並輸入要合成的句子。
110
-
111
  視該時段運算資源和語句長度而定約需70至2000秒,若等候時間過長,可改用較快但較不穩的版本:
112
- https://colab.research.google.com/drive/1_p1NLN5tzBwu92ZftPn5vr0U683pTxSY?usp=sharing
113
  """)
114
 
115
  speaker_selector = gr.Dropdown(
 
7
  import torchaudio
8
  from scipy.signal import resample
9
  import time
10
+ import requests
11
+
12
+ from huggingface_hub import snapshot_download
13
 
14
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
15
  sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
 
17
  from cosyvoice.cli.cosyvoice import CosyVoice
18
  from cosyvoice.utils.file_utils import load_wav
19
 
20
+ def download_audio_from_drive(file_id, save_path="temp_prompt.wav"):
21
+ url = f"https://drive.google.com/uc?export=download&id={file_id}"
22
+ response = requests.get(url)
23
+ if not response.ok or b"<html" in response.content[:100]:
24
+ raise RuntimeError("Google Drive 音檔下載失敗")
25
+ with open(save_path, "wb") as f:
26
+ f.write(response.content)
27
+ return save_path
28
 
29
  preset_speakers = {
30
  "6歲": {
31
+ "file_id": "1PomN05sTjxXBR3q5kOgB7CUEpig2HJmx",
32
  "transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
33
  },
34
  "8歲": {
35
+ "file_id": "1U1DKzN_BsU-DL02HZIRlx-opUJLhaQ41",
36
  "transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
37
  }
38
  }
39
 
40
  def apply_preset(speaker_key):
41
  if speaker_key in preset_speakers:
42
+ file_id = preset_speakers[speaker_key]["file_id"]
43
  transcription = preset_speakers[speaker_key]["transcription"]
44
+ local_path = download_audio_from_drive(file_id)
45
  return local_path, transcription
46
  return None, ""
47
 
 
111
  gr.Markdown("""
112
  # 小睿語音合成
113
  選擇預設語音 (6歲、8歲) 或上傳5~15秒語音樣本及該則語音樣本的轉寫,並輸入要合成的句子。
 
114
  視該時段運算資源和語句長度而定約需70至2000秒,若等候時間過長,可改用較快但較不穩的版本:
115
+ https://colab.research.google.com/drive/1aQSLwzbK9QnNMH4A5eVfqFYfPFXHwlXx?usp=sharing
116
  """)
117
 
118
  speaker_selector = gr.Dropdown(