import gradio as gr from transformers import pipeline import torch # 1. Load Models (this will happen only once when the app starts) print("Loading models...") # ASR Pipeline asr_pipeline = pipeline( "automatic-speech-recognition", model="openai/whisper-small", torch_dtype=torch.float16, # Use float16 for faster inference device="cpu" # Specify CPU device ) # Translation Pipelines translators = { "en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"), "zh-en": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"), "en-ja": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ja"), "ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"), "en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko"), "ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"), } print("Models loaded successfully.") # 2. Define Processing Functions def transcribe_audio(audio_file): print(f"Received audio file: {audio_file}") if audio_file is None: return "" try: # The pipeline expects a file path text = asr_pipeline(audio_file)["text"] print(f"ASR result: {text}") return text except Exception as e: print(f"Error in ASR: {e}") return f"Error in ASR: {e}" def translate_text(text, source_lang, target_lang): print(f"Translating '{text}' from {source_lang} to {target_lang}") if not text: return "" # Direct translation if possible if f"{source_lang}-{target_lang}" in translators: translator = translators[f"{source_lang}-{target_lang}"] translated_text = translator(text)[0]['translation_text'] # Bridge translation via English elif source_lang != 'en' and target_lang != 'en': to_english_translator = translators[f"{source_lang}-en"] english_text = to_english_translator(text)[0]['translation_text'] from_english_translator = translators[f"en-{target_lang}"] translated_text = from_english_translator(english_text)[0]['translation_text'] else: return "Translation route not supported" print(f"Translation result: {translated_text}") return translated_text # 3. Create Gradio Interface with gr.Blocks() as demo: gr.Markdown("## All-in-One ASR and Translation API") with gr.Tab("ASR"): audio_input = gr.Audio(type="filepath", label="Upload Audio") asr_output = gr.Textbox(label="Transcript") asr_button = gr.Button("Transcribe") asr_button.click(transcribe_audio, inputs=audio_input, outputs=asr_output, api_name="asr") with gr.Tab("Translate"): text_input = gr.Textbox(label="Input Text") source_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Source Language") target_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Target Language") translation_output = gr.Textbox(label="Translation") translate_button = gr.Button("Translate") translate_button.click(translate_text, inputs=[text_input, source_lang_input, target_lang_input], outputs=translation_output, api_name="translate") # 4. Launch the App if __name__ == "__main__": demo.launch()