Mejiro J commited on
Commit
bab7d1c
·
1 Parent(s): 9166caa

update with corrected waveform processing of stereo

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +3 -1
.gitignore CHANGED
@@ -1 +1,2 @@
1
- venv
 
 
1
+ venv
2
+ app_local.py
app.py CHANGED
@@ -60,6 +60,8 @@ def text2speech(target_text, game, speaker):
60
 
61
  prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
62
  prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
 
 
63
 
64
  prompt_text = prompt_text_dict[game][speaker]
65
 
@@ -155,7 +157,7 @@ if __name__ == "__main__":
155
  gr.Markdown("## Text to Speech Generation")
156
  with gr.Row():
157
  game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
158
- speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="Kafka")
159
 
160
  target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
161
  output_audio = gr.Audio(label="Generated Audio", type="filepath")
 
60
 
61
  prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
62
  prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
63
+ if prompt_wav.ndim == 3:
64
+ prompt_wav = prompt_wav.mean(dim=2)
65
 
66
  prompt_text = prompt_text_dict[game][speaker]
67
 
 
157
  gr.Markdown("## Text to Speech Generation")
158
  with gr.Row():
159
  game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
160
+ speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="", allow_custom_value=True)
161
 
162
  target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
163
  output_audio = gr.Audio(label="Generated Audio", type="filepath")