TTS-Spaces-Arena

Running

App Files Files Community

Pendrokar commited on Feb 24

Commit

fbe6a51

1 Parent(s): 3dcc608

ZeroGPU XTTS

Browse files

Files changed (2) hide show

app/models.py +26 -12
test_tts_xtts.py +18 -11

app/models.py CHANGED Viewed

@@ -26,6 +26,7 @@ AVAILABLE_MODELS = {
     # '<keyname>':'<Space URL>'
     # gradio version that works with most spaces: 4.29
     # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
     # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
     #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
     #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
@@ -109,13 +110,21 @@ AVAILABLE_MODELS = {
 HF_SPACES = {
     # XTTS v2
     'coqui/xtts': {
         'name': 'XTTS v2',
-        'function': '1',
-        'text_param_index': 0,
-        'return_audio_index': 1,
         'series': 'XTTS',
-        'emoji': '😩', # old gradio
     },
     # WhisperSpeech
@@ -238,7 +247,8 @@ HF_SPACES = {
         'return_audio_index': 0,
         'is_closed_source': True,
         'series': 'Edge TTS',
-        'emoji': '😑', # api disabled
     },
     # Fish Speech
@@ -468,13 +478,17 @@ DEFAULT_VOICE_PROMPT = "female voice; very clear audio"
 # Older gradio spaces use unnamed parameters, both types are valid
 OVERRIDE_INPUTS = {
     'coqui/xtts': {
-        1: 'en',
-        2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
-        3: None, # mic voice sample
-        4: False, #use_mic
-        5: False, #cleanup_reference
-        6: False, #auto_detect
     },
     'collabora/WhisperSpeech': {
         1: DEFAULT_VOICE_SAMPLE, # voice sample
@@ -866,7 +880,7 @@ def make_link_to_space(model_name, for_leaderboard=False):
             emoji = HF_SPACES[model_name]['emoji']
         except:
             pass
-        return emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>'
     # otherwise just return without emoji
     return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'

     # '<keyname>':'<Space URL>'
     # gradio version that works with most spaces: 4.29
     # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32; extra_headers error appears for 5.13+
+    'coqui/xtts': 'tonyassi/voice-clone', # ZeroGPU clone
     # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
     #'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
     #'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
 HF_SPACES = {
     # XTTS v2
+    # 'coqui/xtts': {
+    #     'name': 'XTTS v2',
+    #     'function': '1',
+    #     'text_param_index': 0,
+    #     'return_audio_index': 1,
+    #     'series': 'XTTS',
+    #     'emoji': '😩', # old gradio
+    # },
+    # tonyassi ZeroGPU XTTS v2
     'coqui/xtts': {
         'name': 'XTTS v2',
+        'function': '/predict',
+        'text_param_index': 'text',
+        'return_audio_index': 0,
         'series': 'XTTS',
     },
     # WhisperSpeech
         'return_audio_index': 0,
         'is_closed_source': True,
         'series': 'Edge TTS',
+        'emoji': '', # api disabled
+        'space_link': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
     },
     # Fish Speech
 # Older gradio spaces use unnamed parameters, both types are valid
 OVERRIDE_INPUTS = {
+#     'coqui/xtts': {
+#         1: 'en',
+#         2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
+#         3: None, # mic voice sample
+#         4: False, #use_mic
+#         5: False, #cleanup_reference
+#         6: False, #auto_detect
+#     },
+    # tonyassi ZeroGPU space of XTTS:
     'coqui/xtts': {
+        'audio': DEFAULT_VOICE_SAMPLE, # voice sample
     },
     'collabora/WhisperSpeech': {
         1: DEFAULT_VOICE_SAMPLE, # voice sample
             emoji = HF_SPACES[model_name]['emoji']
         except:
             pass
+        return (emoji +' <a target="_blank" style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_basename +'</a>').strip()
     # otherwise just return without emoji
     return '<span style="'+ style +'" title="'+ title +'" href="'+ space_link +'">'+ model_name +'</span>'

test_tts_xtts.py CHANGED Viewed

@@ -1,17 +1,24 @@
 import os
-from gradio_client import Client, file
-client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'))
 endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
 # print(endpoints)
 result = client.predict(
-        "Quick test.",	# str  in 'What should I say!? (max 512 characters).' Textbox component
-        'en', #lang
-        'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
-        None, # mic voice sample
-        False, #use_mic
-        False, #cleanup_reference
-        False, #auto_detect
-        True, #ToS
-		fn_index=1
 )

 import os
+from gradio_client import Client, handle_file
+# client = Client("coqui/xtts", hf_token=os.getenv('HF_TOKEN'), headers={})
+client = Client("tonyassi/voice-clone", hf_token=os.getenv('HF_TOKEN'), headers={})
 endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
 # print(endpoints)
+# result = client.predict(
+#         "Quick test.",	# str  in 'What should I say!? (max 512 characters).' Textbox component
+#         'en', #lang
+#         'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav', # voice sample
+#         None, # mic voice sample
+#         False, #use_mic
+#         False, #cleanup_reference
+#         False, #auto_detect
+#         True, #ToS
+# 		fn_index=1
+# )
+# tony's space
 result = client.predict(
+        text="Quick test.",	# str  in 'What should I say!? (max 512 characters).' Textbox component
+        audio=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'), # voice sample
+		api_name="/predict"
 )