AKIRA
Finalize all local changes
b3b0b53
raw
history blame
3.24 kB
import gradio as gr
from transformers import pipeline
import torch
# 1. Load Models (this will happen only once when the app starts)
print("Loading models...")
# ASR Pipeline
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
torch_dtype=torch.float16, # Use float16 for faster inference
device="cpu" # Specify CPU device
)
# Translation Pipelines
translators = {
"en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"),
"zh-en": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"),
"en-ja": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ja"),
"ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"),
"en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko"),
"ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"),
}
print("Models loaded successfully.")
# 2. Define Processing Functions
def transcribe_audio(audio_file):
print(f"Received audio file: {audio_file}")
if audio_file is None:
return ""
try:
# The pipeline expects a file path
text = asr_pipeline(audio_file)["text"]
print(f"ASR result: {text}")
return text
except Exception as e:
print(f"Error in ASR: {e}")
return f"Error in ASR: {e}"
def translate_text(text, source_lang, target_lang):
print(f"Translating '{text}' from {source_lang} to {target_lang}")
if not text:
return ""
# Direct translation if possible
if f"{source_lang}-{target_lang}" in translators:
translator = translators[f"{source_lang}-{target_lang}"]
translated_text = translator(text)[0]['translation_text']
# Bridge translation via English
elif source_lang != 'en' and target_lang != 'en':
to_english_translator = translators[f"{source_lang}-en"]
english_text = to_english_translator(text)[0]['translation_text']
from_english_translator = translators[f"en-{target_lang}"]
translated_text = from_english_translator(english_text)[0]['translation_text']
else:
return "Translation route not supported"
print(f"Translation result: {translated_text}")
return translated_text
# 3. Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## All-in-One ASR and Translation API")
with gr.Tab("ASR"):
audio_input = gr.Audio(type="filepath", label="Upload Audio")
asr_output = gr.Textbox(label="Transcript")
asr_button = gr.Button("Transcribe")
asr_button.click(transcribe_audio, inputs=audio_input, outputs=asr_output, api_name="asr")
with gr.Tab("Translate"):
text_input = gr.Textbox(label="Input Text")
source_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Source Language")
target_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Target Language")
translation_output = gr.Textbox(label="Translation")
translate_button = gr.Button("Translate")
translate_button.click(translate_text, inputs=[text_input, source_lang_input, target_lang_input], outputs=translation_output, api_name="translate")
# 4. Launch the App
if __name__ == "__main__":
demo.launch()