MultiMedTulu

Runtime error

not-lain commited on Nov 9, 2023

Commit

c1f218b

1 Parent(s): e555ad1

switched to filepath audio

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,21 +18,25 @@ seamless_client = Client("facebook/seamless_m4t")
-def process_speech(audio):
     """
     processing sound using seamless_m4t
     """
-    print(f"audio : {audio}")
-    print(f"audio type : {type(audio)}")
-    audio_name = f"{np.random.randint(0, 100)}.wav"
-    sr, data = audio
-    write(audio_name, sr, data.astype(np.int16))
     out = seamless_client.predict(
         "S2TT",
         "file",
         None,
-        audio_name, #audio_name
         "",
         "French",# source language
         "English",# target language
@@ -306,7 +310,7 @@ with gr.Blocks(theme='ParityError/Anime') as iface :
         image_output = gr.Markdown(label="output text")
         image_button = gr.Button("process image")
     with gr.Tab("speech to text translation"):
-        audio_input = gr.Audio(label="talk in french")
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)

+def process_speech(audio_input):
     """
     processing sound using seamless_m4t
     """
+    print(f"audio : {audio_input}")
+    print(f"audio type : {type(audio_input)}")
+    try :
+        audio_name = f"{np.random.randint(0, 100)}.wav"
+        sr, data = audio_input
+        write(audio_name, sr, data.astype(np.int16))
+        audio_input = audio_name
+    except :
+        pass
     out = seamless_client.predict(
         "S2TT",
         "file",
         None,
+        audio_input, #audio_name
         "",
         "French",# source language
         "English",# target language
         image_output = gr.Markdown(label="output text")
         image_button = gr.Button("process image")
     with gr.Tab("speech to text translation"):
+        audio_input = gr.Audio(label="talk in french",type="filepath",sources="microphone")
         audio_output = gr.Markdown(label="output text")
         audio_button = gr.Button("process audio")
     text_button.click(process_and_query, inputs=text_input, outputs=text_output)