Spaces:

dazaozi
/

wechat-translator-app

Sleeping

App Files Files Community

AKIRA commited on Jul 20

Commit

ad71343

1 Parent(s): b20b423

feat: Optimize ASR speed with ONNX and audio compression

Browse files

Files changed (3) hide show

app.py +152 -101
pages/index/index.js +81 -167
requirements.txt +7 -0

app.py CHANGED Viewed

@@ -1,125 +1,176 @@
 import gradio as gr
-from transformers import pipeline
 import torch
-# 1. Load Models (this will happen only once when the app starts)
-print("Loading models...")
-# ASR Pipeline
-asr_pipeline = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-small",
-    torch_dtype=torch.float16, # Use float16 for faster inference
-    device="cpu" # Specify CPU device
-)
-# Translation Pipelines
 translators = {
     "en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"),
-    "zh-en": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"),
-    "en-ja": pipeline("translation", model="Helsinki-NLP/opus-mt-en-jap"), # Corrected model name
     "ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"),
-    "en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-ko"), # Corrected model name
     "ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"),
 }
-print("Models loaded successfully.")
-# 2. Define Processing Functions
-def transcribe_audio(audio_file):
-    print(f"Received audio_file (binary): {audio_file}")
-    if audio_file is None:
-        print("Audio file is None.")
-        return ""
     try:
-        import tempfile
-        import soundfile as sf
-        # Gradio passes binary data as a file-like object or path to a temp file
-        # If it's a path, use it directly. If it's binary data, write to temp file.
-        if isinstance(audio_file, str) and os.path.exists(audio_file):
-            audio_path = audio_file
-        else:
-            # Assume it's binary data (bytes or similar)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(audio_file)
-                audio_path = tmp_file.name
-            print(f"Wrote binary data to temp file: {audio_path}")
-        print(f"Audio file path exists: {os.path.exists(audio_path)}")
-        print(f"Audio file size: {os.path.getsize(audio_path)} bytes")
-        # The pipeline expects a file path
-        text = asr_pipeline(audio_path)["text"]
-        print(f"ASR result: {text}")
-        # Clean up temporary file
-        if not isinstance(audio_file, str) or not os.path.exists(audio_file):
-            os.remove(audio_path)
-        return text
     except Exception as e:
-        print(f"Error in ASR: {e}")
-        import traceback
-        traceback.print_exc()
-        return f"Error in ASR: {e}"
 def translate_text(text, source_lang, target_lang):
-    print(f"Translating '{text}' from {source_lang} to {target_lang}")
-    if not text:
-        return ""
-    # Direct translation if possible
-    if f"{source_lang}-{target_lang}" in translators:
-        translator = translators[f"{source_lang}-{target_lang}"]
-        translated_text = translator(text)[0]['translation_text']
-    # Bridge translation via English
-    elif source_lang != 'en' and target_lang != 'en':
-        to_english_translator = translators[f"{source_lang}-en"]
-        english_text = to_english_translator(text)[0]['translation_text']
-        from_english_translator = translators[f"en-{target_lang}"]
-        translated_text = from_english_translator(english_text)[0]['translation_text']
-    else:
-        return "Translation route not supported"
-    print(f"Translation result: {translated_text}")
-    return translated_text
-# 3. Create Gradio Interface
-# Define ASR Interface
-asr_interface = gr.Interface(
-    fn=transcribe_audio,
-    inputs=gr.File(label="Audio Input"),
-    outputs="text",
-    title="ASR API",
-    description="Transcribe audio to text."
-)
-# Define Translation Interface
-translate_interface = gr.Interface(
-    fn=translate_text,
-    inputs=[
-        gr.Textbox(label="Input Text"),
-        gr.Dropdown(["en", "zh", "ja", "ko"], label="Source Language"),
-        gr.Dropdown(["en", "zh", "ja", "ko"], label="Target Language")
-    ],
-    outputs="text",
-    title="Translation API",
-    description="Translate text between supported languages."
-)
-# Combine interfaces into a Blocks app for multiple API endpoints
-with gr.Blocks() as demo:
-    gr.Markdown("## All-in-One ASR and Translation API")
-    with gr.Tab("ASR"):
-        asr_interface.render()
-    with gr.Tab("Translate"):
-        translate_interface.render()
-# 4. Launch the App
 if __name__ == "__main__":
-    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+from transformers import pipeline, AutoProcessor
+from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
 import torch
+import os
+import base64
+import tempfile
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+import uvicorn
+import deepl
+from dotenv import load_dotenv
+import soundfile as sf
+# --- Load environment variables and initialize DeepL ---
+load_dotenv()
+DEEPL_AUTH_KEY = os.getenv("DEEPL_AUTH_KEY")
+deepl_translator = None
+if DEEPL_AUTH_KEY:
+    try:
+        deepl_translator = deepl.Translator(DEEPL_AUTH_KEY)
+        print("DeepL translator initialized successfully.")
+    except Exception as e:
+        print(f"Error initializing DeepL translator: {e}")
+        print("DeepL will be unavailable.")
+else:
+    print("DEEPL_AUTH_KEY not found. DeepL will be unavailable.")
+# --- End ---
+# 1. Load Models
+print("Loading all models... This will take some time on startup.")
+# ASR Model - Using a CPU-optimized ONNX model for speed
+print("Loading optimized ASR model...")
+asr_model_id = "openai/whisper-base"
+# Load the model and processor using Optimum for ONNX runtime acceleration
+asr_model = ORTModelForSpeechSeq2Seq.from_pretrained(asr_model_id, provider="CPUExecutionProvider")
+asr_processor = AutoProcessor.from_pretrained(asr_model_id)
+print("Optimized ASR model loaded.")
+# Translation Pipelines - Reverting to the 6 core, absolutely reliable models
 translators = {
     "en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"),
+    "zh-en": pipeline("translation", model="Varine/opus-mt-zh-en-model"),
+    "en-ja": pipeline("translation", model="staka/fugumt-en-ja"),
     "ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"),
+    "en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-ko"),
     "ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"),
 }
+print("All models loaded successfully.")
+# 2. Define Core Logic Functions
+def transcribe_audio(audio_bytes):
     try:
+        # Use a temporary file to handle the audio bytes
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+            tmp_file.write(audio_bytes)
+            audio_path = tmp_file.name
+        # Read the audio file and process it
+        audio_input, sample_rate = sf.read(audio_path)
+        # Ensure the audio is in the correct format (mono, 16kHz)
+        if audio_input.ndim > 1:
+            audio_input = audio_input.mean(axis=1) # to mono
+        if sample_rate != 16000:
+            # This is a placeholder for resampling. For now, we assume frontend sends 16kHz.
+            pass
+        # Process audio and generate token IDs
+        input_features = asr_processor(audio_input, sampling_rate=16000, return_tensors="pt").input_features
+        predicted_ids = asr_model.generate(input_features)
+        # Decode the token IDs to text
+        text = asr_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        os.remove(audio_path)
+        return text, None
     except Exception as e:
+        # Clean up the temp file in case of an error
+        if 'audio_path' in locals() and os.path.exists(audio_path):
+            os.remove(audio_path)
+        return None, str(e)
 def translate_text(text, source_lang, target_lang):
+    if not text or not source_lang or not target_lang:
+        return "", None
+    if source_lang == target_lang:
+        return text, None
+    # --- DeepL Hybrid Logic ---
+    if source_lang == 'zh' and target_lang == 'ja' and deepl_translator:
+        print("Attempting translation with DeepL for zh -> ja")
+        try:
+            result = deepl_translator.translate_text(text, target_lang="JA")
+            return result.text, None
+        except Exception as e:
+            print(f"DeepL API call failed: {e}. Falling back to Hugging Face model.")
+    # --- End ---
+    key = f"{source_lang}-{target_lang}"
+    try:
+        if key in translators:
+            return translators[key](text)[0]['translation_text'], None
+        elif source_lang != 'en' and target_lang != 'en':
+            if f"{source_lang}-en" not in translators or f"en-{target_lang}" not in translators:
+                return None, f"Bridge translation route not supported: {source_lang}-en or en-{target_lang}"
+            print(f"Performing bridge translation: {source_lang} -> en -> {target_lang}")
+            english_text = translators[f"{source_lang}-en"](text)[0]['translation_text']
+            return translators[f"en-{target_lang}"](english_text)[0]['translation_text'], None
+        else:
+            return None, f"Translation route not supported: {key}"
+    except Exception as e:
+        return None, str(e)
+# 3. Create FastAPI App
+app = FastAPI()
+# 4. Define API Endpoints with FastAPI
+@app.post("/api/asr")
+async def api_asr(request: Request):
+    json_data = await request.json()
+    audio_data_uri = json_data.get('audio_data_uri')
+    if not audio_data_uri:
+        return JSONResponse(status_code=400, content={"error": "No audio_data_uri provided"})
+    try:
+        header, encoded = audio_data_uri.split(",", 1)
+        audio_bytes = base64.b64decode(encoded)
+        transcript, error = transcribe_audio(audio_bytes)
+        if error:
+            return JSONResponse(status_code=500, content={"error": f"ASR Error: {error}"})
+        return JSONResponse(status_code=200, content={"transcript": transcript})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": f"Server error: {e}"})
+@app.post("/api/translate")
+async def api_translate(request: Request):
+    json_data = await request.json()
+    text = json_data.get('text')
+    source_lang = json_data.get('source_lang')
+    target_lang = json_data.get('target_lang')
+    if not all([text, source_lang, target_lang]):
+        return JSONResponse(status_code=400, content={"error": "Missing parameters"})
+    translated_text, error = translate_text(text, source_lang, target_lang)
+    if error:
+        return JSONResponse(status_code=500, content={"error": error})
+    return JSONResponse(status_code=200, content={"translated_text": translated_text})
+# 5. Create a simple Gradio UI for debugging (Optional)
+def gradio_asr(audio_file):
+    if audio_file is None:
+        return ""
+    # Gradio provides a file object, read its bytes
+    audio_input, sample_rate = sf.read(audio_file.name)
+    # Process audio and generate token IDs
+    input_features = asr_processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_features
+    predicted_ids = asr_model.generate(input_features)
+    # Decode the token IDs to text
+    transcript = asr_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    return transcript
+gradio_ui = gr.Interface(fn=gradio_asr, inputs=gr.Audio(type="filepath"), outputs="text", title="ASR Debugger")
+# 6. Mount Gradio app onto FastAPI
+app = gr.mount_gradio_app(app, gradio_ui, path="/")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

pages/index/index.js CHANGED Viewed

@@ -1,235 +1,149 @@
 Page({
     data: {
-        // Updated language codes to match Hugging Face API
         languages: {
             'zh': { name: '中文', flag: 'cn' },
             'en': { name: 'English', flag: 'us' },
             'ja': { name: '日本語', flag: 'jp' },
             'ko': { name: '한국어', flag: 'kr' }
         },
-        langCodes: ['zh', 'en', 'ja', 'ko'],
         sourceLang: 'zh',
         targetLang: 'en',
         transcript: '',
         outputText: '',
         isRecording: false,
-        sourceLanguages: [],
-        targetLanguages: [],
-        // Hugging Face Space API URL
-        hfSpaceUrl: 'https://dazaozi-wechat-translator-app.hf.space' // REPLACE WITH YOUR ACTUAL SPACE URL
     },
     onLoad: function () {
-        this.initializeLanguages();
         this.recorderManager = wx.getRecorderManager();
         this.initRecorderManager();
-    },
-    initializeLanguages: function () {
-        const { langCodes, languages, sourceLang, targetLang } = this.data;
-        const sourceLanguages = langCodes.map(code => ({
-            langCode: code,
-            name: languages[code].name,
-            flag: languages[code].flag,
-            selected: code === sourceLang
-        }));
-        const targetLanguages = langCodes.map(code => ({
-            langCode: code,
-            name: languages[code].name,
-            flag: languages[code].flag,
-            selected: code === targetLang
-        }));
-        this.setData({ sourceLanguages, targetLanguages });
-    },
-    selectSourceLanguage: function (e) {
-        const newSourceLang = e.currentTarget.dataset.langCode;
-        this.setData({ sourceLang: newSourceLang }, () => {
-            this.initializeLanguages();
-            if (this.data.transcript.trim() && this.data.transcript !== '正在聆听...' && this.data.transcript !== '未能识别到语音') {
-                this.translate(this.data.transcript);
-            }
-        });
-    },
-    selectTargetLanguage: function (e) {
-        const newTargetLang = e.currentTarget.dataset.langCode;
-        this.setData({ targetLang: newTargetLang }, () => {
-            this.initializeLanguages();
-            if (this.data.transcript.trim() && this.data.transcript !== '正在聆听...' && this.data.transcript !== '未能识别到语音') {
-                this.translate(this.data.transcript);
-            }
         });
     },
     swapLanguages: function () {
-        let { sourceLang, targetLang, transcript, outputText } = this.data;
-        const tempLang = sourceLang;
-        sourceLang = targetLang;
-        targetLang = tempLang;
-        const tempText = transcript;
-        transcript = outputText;
-        outputText = tempText;
-        this.setData({ sourceLang, targetLang, transcript, outputText }, () => {
-            this.initializeLanguages();
-            if (this.data.transcript.trim() && this.data.transcript !== '正在聆听...' && this.data.transcript !== '未能识别到语音') {
-                this.translate(this.data.transcript);
-            }
-        });
     },
     initRecorderManager: function () {
         this.recorderManager.onStart(() => {
-            console.log('recorder start');
             this.setData({ isRecording: true, transcript: '正在聆听...', outputText: '' });
         });
         this.recorderManager.onStop((res) => {
-            console.log('recorder stop', res);
             this.setData({ isRecording: false });
-            const { tempFilePath } = res;
-            if (tempFilePath) {
-                this.uploadAudioForASR(tempFilePath);
             } else {
-                console.error("onStop event triggered without a tempFilePath.");
-                this.setData({ transcript: '录音文件创建失败' });
             }
         });
         this.recorderManager.onError((res) => {
-            console.error('recorder error', res);
-            this.setData({ isRecording: false, transcript: '语音识别出错' });
         });
     },
-    startRecording: function () {
         wx.getSetting({
             success: (res) => {
                 if (!res.authSetting['scope.record']) {
-                    wx.authorize({
-                        scope: 'scope.record',
-                        success: () => {
-                            this.recorderManager.start({ duration: 60000, sampleRate: 16000, numberOfChannels: 1, encodeBitRate: 96000, format: 'mp3' });
-                        },
-                        fail: () => {
-                            wx.showToast({ title: '请授权麦克风权限', icon: 'none' });
-                        }
-                    });
                 } else {
-                    this.recorderManager.start({ duration: 60000, sampleRate: 16000, numberOfChannels: 1, encodeBitRate: 96000, format: 'mp3' });
                 }
-            }
         });
     },
     stopRecording: function () {
-        if (this.recorderManager) {
-            this.recorderManager.stop();
-        }
     },
     uploadAudioForASR: function (filePath) {
-        console.log('Sending audio to Hugging Face ASR backend:', filePath);
         this.setData({ transcript: '正在识别...' });
-        const fileSystemManager = wx.getFileSystemManager();
-        fileSystemManager.readFile({
-            filePath: filePath,
-            // Read as ArrayBuffer
-            success: (res) => {
-                wx.request({
-                    url: `${this.data.hfSpaceUrl}/api/predict`,
-                    method: 'POST',
-                    header: {
-                        'Content-Type': 'application/octet-stream' // Send as binary stream
-                    },
-                    data: res.data, // Send ArrayBuffer directly
-                    timeout: 60000, // 60 seconds timeout for ASR
-                    success: (res) => {
-                        try {
-                            const data = JSON.parse(res.data);
-                            // Gradio API returns result in data[0]
-                            if (res.statusCode === 200 && data.data && data.data.length > 0) {
-                                const transcript = data.data[0];
-                                if (transcript) {
-                                    this.setData({ transcript: transcript });
-                                    this.translate(transcript);
-                                } else {
-                                    this.setData({ transcript: '未能识别到语音' });
-                                }
-                            } else {
-                                this.setData({ transcript: '语音识别失败' });
-                                console.error('ASR backend response error:', res);
-                            }
-                        } catch (e) {
-                            console.error('Failed to parse ASR response JSON:', e, res.data);
-                            this.setData({ transcript: '识别响应格式错误' });
-                        }
-                    },
-                    fail: (err) => {
-                        console.error('ASR request error:', err);
-                        if (err.errMsg && err.errMsg.includes('timeout')) {
-                            this.setData({ transcript: '识别超时，请重试' });
-                        } else {
-                            this.setData({ transcript: '语音识别出错' });
-                        }
                     }
-                });
-            },
-            fail: (err) => {
-                console.error('Failed to read audio file:', err);
-                this.setData({ transcript: '读取音频文件失败' });
-            }
-        });
     },
     translate: function (text) {
-        if (!text || !this.data.sourceLang || !this.data.targetLang) return;
-        const source = this.data.sourceLang;
-        const target = this.data.targetLang;
-        if (source === target) {
-            this.setData({ outputText: text });
-            return;
         }
         this.setData({ outputText: '正在翻译...' });
         wx.request({
-            url: `${this.data.hfSpaceUrl}/api/predict`,
             method: 'POST',
-            header: {
-                'content-type': 'application/json'
-            },
-            data: {
-                // Gradio API expects data in a specific format for predict
-                data: [text, source, target]
-            },
-            timeout: 30000, // 30 seconds timeout for translation
             success: (res) => {
-                try {
-                    const data = res.data;
-                    if (res.statusCode === 200 && data.data && data.data.length > 0) {
-                        const translatedText = data.data[0];
-                        this.setData({ outputText: translatedText });
-                    } else {
-                        this.setData({ outputText: '翻译失败' });
-                        console.error('Translation backend response error:', res);
-                    }
-                } catch (e) {
-                    console.error('Failed to parse Translation response JSON:', e, res.data);
-                    this.setData({ outputText: '翻译响应格式错误' });
                 }
             },
-            fail: (err) => {
-                console.error('Translation request error:', err);
-                this.setData({ outputText: '翻译出错' });
-            }
         });
     }
 });

+// FINAL VERSION: v29 - Based on the user's working original file
+// Helper function to show detailed errors
+function showDetailedError(title, content) {
+    wx.showModal({
+        title: title,
+        content: typeof content === 'object' ? JSON.stringify(content) : String(content),
+        showCancel: false
+    });
+}
 Page({
     data: {
         languages: {
             'zh': { name: '中文', flag: 'cn' },
             'en': { name: 'English', flag: 'us' },
             'ja': { name: '日本語', flag: 'jp' },
             'ko': { name: '한국어', flag: 'kr' }
         },
         sourceLang: 'zh',
         targetLang: 'en',
         transcript: '',
         outputText: '',
         isRecording: false,
+        hfSpaceUrl: 'https://dazaozi-wechat-translator-app.hf.space',
     },
     onLoad: function () {
+        // Use the working pattern: attach recorderManager to `this`
         this.recorderManager = wx.getRecorderManager();
         this.initRecorderManager();
+        // Use the improved, simpler language list setup
+        this.setData({
+            sourceLanguages: Object.keys(this.data.languages).map(key => ({ ...this.data.languages[key], langCode: key })),
+            targetLanguages: Object.keys(this.data.languages).map(key => ({ ...this.data.languages[key], langCode: key }))
         });
     },
+    // --- Language Selection & UI (Simplified) ---
+    selectSourceLanguage: function (e) { this.setData({ sourceLang: e.currentTarget.dataset.langCode }); },
+    selectTargetLanguage: function (e) { this.setData({ targetLang: e.currentTarget.dataset.langCode }); },
     swapLanguages: function () {
+        this.setData({ sourceLang: this.data.targetLang, targetLang: this.data.sourceLang, transcript: this.data.outputText, outputText: this.data.transcript });
     },
+    // --- Unified Native Recorder Initialization (Correct Pattern) ---
     initRecorderManager: function () {
         this.recorderManager.onStart(() => {
+            // Correct pattern: Set UI state *inside* the onStart callback
             this.setData({ isRecording: true, transcript: '正在聆听...', outputText: '' });
         });
         this.recorderManager.onStop((res) => {
             this.setData({ isRecording: false });
+            if (res.tempFilePath) {
+                this.uploadAudioForASR(res.tempFilePath);
             } else {
+                // This case might happen if recording is too short
+                this.setData({ transcript: '录音时间太短或无效' });
             }
         });
         this.recorderManager.onError((res) => {
+            this.setData({ isRecording: false });
+            showDetailedError('录音发生错误', res);
         });
     },
+    // --- Main Record Button Handler (with Permissions) ---
+    handleRecordToggle: function() {
+        if (this.data.isRecording) {
+            this.stopRecording();
+            return;
+        }
         wx.getSetting({
             success: (res) => {
                 if (!res.authSetting['scope.record']) {
+                    wx.authorize({ scope: 'scope.record', success: this.startRecording, fail: (err) => showDetailedError('授权失败', err) });
                 } else {
+                    this.startRecording();
                 }
+            },
+            fail: (err) => showDetailedError('无法获取权限设置', err)
         });
     },
+    // --- Unified Start/Stop Recording ---
+    startRecording: function () {
+        const options = {
+          duration: 60000,       // Max recording duration: 60s
+          sampleRate: 16000,     // For ASR, 16kHz is the standard
+          numberOfChannels: 1,   // Mono audio is sufficient
+          encodeBitRate: 48000,  // 48kbps is a good balance for speech
+          format: 'mp3'          // Use mp3 format
+        };
+        this.recorderManager.start(options);
+    },
     stopRecording: function () {
+        this.recorderManager.stop();
     },
+    // --- Unified Backend ASR & Translation Flow ---
     uploadAudioForASR: function (filePath) {
         this.setData({ transcript: '正在识别...' });
+        wx.getFileSystemManager().readFile({ filePath, encoding: 'base64', success: (res) => {
+            wx.request({
+                url: `${this.data.hfSpaceUrl}/api/asr`,
+                method: 'POST',
+                data: { "audio_data_uri": `data:audio/mp3;base64,${res.data}` },
+                timeout: 60000,
+                success: (asrRes) => {
+                    if (asrRes.statusCode === 200 && asrRes.data.transcript) {
+                        const transcript = asrRes.data.transcript;
+                        this.setData({ transcript });
+                        this.translate(transcript);
+                    } else {
+                        showDetailedError('语音识别失败', asrRes.data);
                     }
+                },
+                fail: (err) => showDetailedError('识别请求失败', err)
+            });
+        }});
     },
     translate: function (text) {
+        if (!text) return;
+        const { sourceLang, targetLang } = this.data;
+        if (sourceLang === targetLang) {
+            return this.setData({ outputText: text });
         }
         this.setData({ outputText: '正在翻译...' });
         wx.request({
+            url: `${this.data.hfSpaceUrl}/api/translate`,
             method: 'POST',
+            data: { "text": text, "source_lang": sourceLang, "target_lang": targetLang },
+            timeout: 45000,
             success: (res) => {
+                if (res.statusCode === 200 && res.data.translated_text) {
+                    this.setData({ outputText: res.data.translated_text });
+                } else {
+                    showDetailedError('翻译失败', res.data);
                 }
             },
+            fail: (err) => showDetailedError('翻译请求失败', err)
         });
     }
 });

requirements.txt CHANGED Viewed

@@ -2,3 +2,10 @@ transformers
 torch
 sentencepiece
 gradio

 torch
 sentencepiece
 gradio
+soundfile
+sacremoses
+fastapi
+uvicorn
+deepl
+python-dotenv
+optimum[onnxruntime]