Spaces:
Sleeping
Sleeping
Mejiro J
commited on
Commit
·
bab7d1c
1
Parent(s):
9166caa
update with corrected waveform processing of stereo
Browse files- .gitignore +2 -1
- app.py +3 -1
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
venv
|
|
|
|
1 |
+
venv
|
2 |
+
app_local.py
|
app.py
CHANGED
@@ -60,6 +60,8 @@ def text2speech(target_text, game, speaker):
|
|
60 |
|
61 |
prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
|
62 |
prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
|
|
|
|
|
63 |
|
64 |
prompt_text = prompt_text_dict[game][speaker]
|
65 |
|
@@ -155,7 +157,7 @@ if __name__ == "__main__":
|
|
155 |
gr.Markdown("## Text to Speech Generation")
|
156 |
with gr.Row():
|
157 |
game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
|
158 |
-
speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="
|
159 |
|
160 |
target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
|
161 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
|
|
60 |
|
61 |
prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
|
62 |
prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
|
63 |
+
if prompt_wav.ndim == 3:
|
64 |
+
prompt_wav = prompt_wav.mean(dim=2)
|
65 |
|
66 |
prompt_text = prompt_text_dict[game][speaker]
|
67 |
|
|
|
157 |
gr.Markdown("## Text to Speech Generation")
|
158 |
with gr.Row():
|
159 |
game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
|
160 |
+
speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="", allow_custom_value=True)
|
161 |
|
162 |
target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
|
163 |
output_audio = gr.Audio(label="Generated Audio", type="filepath")
|