kanahomaisa commited on
Commit
2a88d38
·
verified ·
1 Parent(s): 4c4a4de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,38 +1,44 @@
1
  import os
2
  import sys
3
  import gradio as gr
4
- import random
5
  import numpy as np
6
  import torch
7
  import librosa
8
  import torchaudio
9
  from scipy.signal import resample
10
  import time
11
- import requests
12
 
13
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
14
  sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
15
 
16
  from cosyvoice.cli.cosyvoice import CosyVoice
17
  from cosyvoice.utils.file_utils import load_wav
18
- from huggingface_hub import snapshot_download
 
 
 
 
 
 
19
 
20
  preset_speakers = {
21
  "6歲": {
22
- "audio_path": "data/smile_train.wav",
23
  "transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
24
  },
25
  "8歲": {
26
- "audio_path": "data/utensils.wav",
27
  "transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
28
  }
29
  }
30
 
31
  def apply_preset(speaker_key):
32
  if speaker_key in preset_speakers:
33
- path = preset_speakers[speaker_key]["audio_path"]
34
- text = preset_speakers[speaker_key]["transcription"]
35
- return path, text
 
36
  return None, ""
37
 
38
  max_val = 0.8
@@ -97,12 +103,6 @@ def synthesize_speech(speaker_audio_path, content_text, speaker_text, speaker_ke
97
  torchaudio.save(out_path, torch.tensor(audio_data).unsqueeze(0), sample_rate=target_sr)
98
  return out_path
99
 
100
- def download_audio_from_url(url, save_path="temp_prompt.wav"):
101
- response = requests.get(url)
102
- with open(save_path, "wb") as f:
103
- f.write(response.content)
104
- return save_path
105
-
106
  with gr.Blocks() as demo:
107
  gr.Markdown("""
108
  # 小睿語音合成
@@ -147,4 +147,5 @@ with gr.Blocks() as demo:
147
  為了加速,已關閉自動語音辨識,務必正確輸入語音樣本的文字轉寫。
148
  """)
149
 
150
- demo.launch()
 
 
1
  import os
2
  import sys
3
  import gradio as gr
 
4
  import numpy as np
5
  import torch
6
  import librosa
7
  import torchaudio
8
  from scipy.signal import resample
9
  import time
10
+ from huggingface_hub import snapshot_download, hf_hub_download
11
 
12
  ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
13
  sys.path.append(f'{ROOT_DIR}/third_party/Matcha-TTS')
14
 
15
  from cosyvoice.cli.cosyvoice import CosyVoice
16
  from cosyvoice.utils.file_utils import load_wav
17
+
18
+ def download_audio_from_dataset(filename, save_path="temp_prompt.wav"):
19
+ return hf_hub_download(
20
+ repo_id="kanahomaisa/breezyvoice-samples",
21
+ filename=filename,
22
+ repo_type="dataset"
23
+ )
24
 
25
  preset_speakers = {
26
  "6歲": {
27
+ "filename": "smile_train.wav",
28
  "transcription": "名字是微笑號,只是呢你們看,這一輛微笑號它這裡有寫八百型的喔,它是八百山出來"
29
  },
30
  "8歲": {
31
+ "filename": "utensils.wav",
32
  "transcription": "出來了出來了,你知道這個餐具是可以挖的,這個餐具可以用窩課魅一起挖嗎?"
33
  }
34
  }
35
 
36
  def apply_preset(speaker_key):
37
  if speaker_key in preset_speakers:
38
+ filename = preset_speakers[speaker_key]["filename"]
39
+ transcription = preset_speakers[speaker_key]["transcription"]
40
+ local_path = download_audio_from_dataset(filename)
41
+ return local_path, transcription
42
  return None, ""
43
 
44
  max_val = 0.8
 
103
  torchaudio.save(out_path, torch.tensor(audio_data).unsqueeze(0), sample_rate=target_sr)
104
  return out_path
105
 
 
 
 
 
 
 
106
  with gr.Blocks() as demo:
107
  gr.Markdown("""
108
  # 小睿語音合成
 
147
  為了加速,已關閉自動語音辨識,務必正確輸入語音樣本的文字轉寫。
148
  """)
149
 
150
+ if __name__ == "__main__":
151
+ demo.launch()