Spaces:

kafka23
/

LLASA-ZZZ-Genshin-HSR

Sleeping

Mejiro J commited on 9 days ago

Commit

bab7d1c

1 Parent(s): 9166caa

update with corrected waveform processing of stereo

Files changed (2) hide show

.gitignore CHANGED Viewed

app.py CHANGED Viewed

@@ -60,6 +60,8 @@ def text2speech(target_text, game, speaker):
     prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
     prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
     prompt_text = prompt_text_dict[game][speaker]
@@ -155,7 +157,7 @@ if __name__ == "__main__":
         gr.Markdown("## Text to Speech Generation")
         with gr.Row():
             game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
-            speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="Kafka")
         target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
         output_audio = gr.Audio(label="Generated Audio", type="filepath")

     prompt_wav, sr = sf.read(f"Reference_Voice/{game}/{speaker}/audio.mp3")
     prompt_wav = torch.from_numpy(prompt_wav).float().unsqueeze(0)
+    if prompt_wav.ndim == 3:
+        prompt_wav = prompt_wav.mean(dim=2)
     prompt_text = prompt_text_dict[game][speaker]
         gr.Markdown("## Text to Speech Generation")
         with gr.Row():
             game = gr.Dropdown(label="Game", choices=game_choices, value="HonkaiSR")
+            speaker = gr.Dropdown(label="Speaker", choices=speaker_game_dict[game.value], value="", allow_custom_value=True)
         target_text = gr.Textbox(label="Target Text", placeholder="Enter the text you want to convert to speech.")
         output_audio = gr.Audio(label="Generated Audio", type="filepath")