Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import torch | |
# 1. Load Models (this will happen only once when the app starts) | |
print("Loading models...") | |
# ASR Pipeline | |
asr_pipeline = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-small", | |
torch_dtype=torch.float16, # Use float16 for faster inference | |
device="cpu" # Specify CPU device | |
) | |
# Translation Pipelines | |
translators = { | |
"en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"), | |
"zh-en": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"), | |
"en-ja": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ja"), | |
"ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"), | |
"en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko"), | |
"ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"), | |
} | |
print("Models loaded successfully.") | |
# 2. Define Processing Functions | |
def transcribe_audio(audio_file): | |
print(f"Received audio file: {audio_file}") | |
if audio_file is None: | |
return "" | |
try: | |
# The pipeline expects a file path | |
text = asr_pipeline(audio_file)["text"] | |
print(f"ASR result: {text}") | |
return text | |
except Exception as e: | |
print(f"Error in ASR: {e}") | |
return f"Error in ASR: {e}" | |
def translate_text(text, source_lang, target_lang): | |
print(f"Translating '{text}' from {source_lang} to {target_lang}") | |
if not text: | |
return "" | |
# Direct translation if possible | |
if f"{source_lang}-{target_lang}" in translators: | |
translator = translators[f"{source_lang}-{target_lang}"] | |
translated_text = translator(text)[0]['translation_text'] | |
# Bridge translation via English | |
elif source_lang != 'en' and target_lang != 'en': | |
to_english_translator = translators[f"{source_lang}-en"] | |
english_text = to_english_translator(text)[0]['translation_text'] | |
from_english_translator = translators[f"en-{target_lang}"] | |
translated_text = from_english_translator(english_text)[0]['translation_text'] | |
else: | |
return "Translation route not supported" | |
print(f"Translation result: {translated_text}") | |
return translated_text | |
# 3. Create Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## All-in-One ASR and Translation API") | |
with gr.Tab("ASR"): | |
audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
asr_output = gr.Textbox(label="Transcript") | |
asr_button = gr.Button("Transcribe") | |
asr_button.click(transcribe_audio, inputs=audio_input, outputs=asr_output, api_name="asr") | |
with gr.Tab("Translate"): | |
text_input = gr.Textbox(label="Input Text") | |
source_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Source Language") | |
target_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Target Language") | |
translation_output = gr.Textbox(label="Translation") | |
translate_button = gr.Button("Translate") | |
translate_button.click(translate_text, inputs=[text_input, source_lang_input, target_lang_input], outputs=translation_output, api_name="translate") | |
# 4. Launch the App | |
if __name__ == "__main__": | |
demo.launch() | |