Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
import assemblyai as aai
|
5 |
+
from translate import Translator
|
6 |
+
import uuid
|
7 |
+
from gtts import gTTS
|
8 |
+
import tempfile
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
def voice_to_voice(audio_file):
|
12 |
+
# Transcribe speech
|
13 |
+
transcript = transcribe_audio(audio_file)
|
14 |
+
|
15 |
+
if transcript.status == aai.TranscriptStatus.error:
|
16 |
+
raise gr.Error(transcript.error)
|
17 |
+
else:
|
18 |
+
transcript = transcript.text
|
19 |
+
|
20 |
+
# Translate text
|
21 |
+
list_translations = translate_text(transcript)
|
22 |
+
generated_audio_paths = []
|
23 |
+
|
24 |
+
# Generate speech from translated text
|
25 |
+
for translation in list_translations:
|
26 |
+
translated_audio_file_name = text_to_speech(translation)
|
27 |
+
path = Path(translated_audio_file_name)
|
28 |
+
generated_audio_paths.append(path)
|
29 |
+
|
30 |
+
return generated_audio_paths[0], generated_audio_paths[1], generated_audio_paths[2], generated_audio_paths[3], generated_audio_paths[4], generated_audio_paths[5], list_translations[0], list_translations[1], list_translations[2], list_translations[3], list_translations[4], list_translations[5]
|
31 |
+
|
32 |
+
# Function to transcribe audio using AssemblyAI
|
33 |
+
def transcribe_audio(audio_file):
|
34 |
+
aai.settings.api_key = "21f30361d02543cca65707e8f71721d8"
|
35 |
+
|
36 |
+
transcriber = aai.Transcriber()
|
37 |
+
transcript = transcriber.transcribe(audio_file)
|
38 |
+
|
39 |
+
return transcript
|
40 |
+
|
41 |
+
# Function to translate text
|
42 |
+
def translate_text(text: str) -> str:
|
43 |
+
languages = ["ru", "tr", "sv", "de", "es", "ja"]
|
44 |
+
list_translations = []
|
45 |
+
|
46 |
+
for lan in languages:
|
47 |
+
translator = Translator(from_lang="en", to_lang=lan)
|
48 |
+
translation = translator.translate(text)
|
49 |
+
list_translations.append(translation)
|
50 |
+
|
51 |
+
return list_translations
|
52 |
+
|
53 |
+
# Function to generate speech with gTTS (Google Text-to-Speech)
|
54 |
+
def text_to_speech(text: str) -> str:
|
55 |
+
# Generate speech using gTTS (Google Text-to-Speech)
|
56 |
+
tts = gTTS(text=text, lang='en', slow=True)
|
57 |
+
|
58 |
+
# Save the audio to a temporary file
|
59 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
60 |
+
tts.save(tmp_file.name)
|
61 |
+
audio_path = tmp_file.name
|
62 |
+
|
63 |
+
return audio_path
|
64 |
+
|
65 |
+
input_audio = gr.Audio(
|
66 |
+
sources=["microphone"],
|
67 |
+
type="filepath",
|
68 |
+
show_download_button=True,
|
69 |
+
waveform_options=gr.WaveformOptions(
|
70 |
+
waveform_color="#01C6FF",
|
71 |
+
waveform_progress_color="#0066B4",
|
72 |
+
skip_length=2,
|
73 |
+
show_controls=False,
|
74 |
+
),
|
75 |
+
)
|
76 |
+
|
77 |
+
with gr.Blocks() as demo:
|
78 |
+
gr.Markdown("## Echo: Voice Translation App")
|
79 |
+
gr.Markdown("## Record yourself in English and immediately receive voice translations.")
|
80 |
+
with gr.Row():
|
81 |
+
with gr.Column():
|
82 |
+
audio_input = gr.Audio(sources=["microphone"],
|
83 |
+
type="filepath",
|
84 |
+
show_download_button=True,
|
85 |
+
waveform_options=gr.WaveformOptions(
|
86 |
+
waveform_color="#01C6FF",
|
87 |
+
waveform_progress_color="#0066B4",
|
88 |
+
skip_length=2,
|
89 |
+
show_controls=False,
|
90 |
+
),)
|
91 |
+
with gr.Row():
|
92 |
+
submit = gr.Button("Submit", variant="primary")
|
93 |
+
btn = gr.ClearButton(audio_input, "Clear")
|
94 |
+
|
95 |
+
with gr.Row():
|
96 |
+
with gr.Group() as turkish:
|
97 |
+
tr_output = gr.Audio(label="Turkish", interactive=False)
|
98 |
+
tr_text = gr.Markdown()
|
99 |
+
|
100 |
+
with gr.Group() as swedish:
|
101 |
+
sv_output = gr.Audio(label="Swedish", interactive=False)
|
102 |
+
sv_text = gr.Markdown()
|
103 |
+
|
104 |
+
with gr.Group() as russian:
|
105 |
+
ru_output = gr.Audio(label="Russian", interactive=False)
|
106 |
+
ru_text = gr.Markdown()
|
107 |
+
|
108 |
+
with gr.Row():
|
109 |
+
with gr.Group():
|
110 |
+
de_output = gr.Audio(label="German", interactive=False)
|
111 |
+
de_text = gr.Markdown()
|
112 |
+
|
113 |
+
with gr.Group():
|
114 |
+
es_output = gr.Audio(label="Spanish", interactive=False)
|
115 |
+
es_text = gr.Markdown()
|
116 |
+
|
117 |
+
with gr.Group():
|
118 |
+
jp_output = gr.Audio(label="Japanese", interactive=False)
|
119 |
+
jp_text = gr.Markdown()
|
120 |
+
|
121 |
+
output_components = [ru_output, tr_output, sv_output, de_output, es_output, jp_output, ru_text, tr_text, sv_text, de_text, es_text, jp_text]
|
122 |
+
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
|
123 |
+
|
124 |
+
if __name__ == "__main__":
|
125 |
+
demo.launch()
|