Spaces:
Runtime error
Runtime error
| '''Librispeech 100h English ASR demo | |
| @ML2 --> @HuggingFace | |
| 2022-02-11 | |
| 2022-02-16 | |
| - changed to HF | |
| - server setting commented | |
| - model cache dir commented | |
| ''' | |
| import os | |
| from glob import glob | |
| from loguru import logger | |
| import soundfile as sf | |
| import librosa | |
| # from scipy.io import wavfile | |
| import gradio as gr | |
| from espnet_model_zoo.downloader import ModelDownloader | |
| from espnet2.bin.asr_inference import Speech2Text | |
| # ---------- Settings ---------- | |
| GPU_ID = '-1' | |
| os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID | |
| DEVICE = 'cuda' if GPU_ID != '-1' else 'cpu' | |
| SERVER_PORT = 42208 | |
| SERVER_NAME = "0.0.0.0" | |
| SSL_DIR = './keyble_ssl' | |
| # MODEL_DIR = '/home/jkang/HDD4T/jkang/huggingface' | |
| MODEL_DIR = './model' | |
| EXAMPLE_DIR = './examples' | |
| examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav'))) | |
| # ---------- Logging ---------- | |
| logger.add('app.log', mode='a') | |
| logger.info('============================= App restarted =============================') | |
| # ---------- Model ---------- | |
| logger.info('download model') | |
| d = ModelDownloader(MODEL_DIR) | |
| out = d.download_and_unpack("jkang/espnet2_librispeech_100_conformer") | |
| logger.info('model downloaded') | |
| model = Speech2Text.from_pretrained( | |
| asr_train_config=out['asr_train_config'], | |
| asr_model_file=out['asr_model_file'] | |
| ) | |
| logger.info('model loaded') | |
| def predict(wav_file): | |
| logger.info('wav file loaded') | |
| # speech, rate = sf.read(wav_file) | |
| speech, rate = librosa.load(wav_file, sr=16000) | |
| # rate, speech = wavfile.read(wav_file) | |
| nbests = model(speech) | |
| text, *_ = nbests[0] | |
| logger.info('predicted') | |
| return text | |
| iface = gr.Interface( | |
| predict, | |
| title='ESPNet2 ASR Librispeech Conformer (trained on clean-100h)', | |
| description='Upload your wav file to test the model', | |
| inputs=[ | |
| gr.inputs.Audio(label='wav file', source='microphone', type='filepath') | |
| ], | |
| outputs=[ | |
| gr.outputs.Textbox(label='decoding result'), | |
| ], | |
| examples=examples, | |
| article='<p style="text-align:center">Model URL<a target="_blank" href="https://huggingface.co/jkang/espnet2_librispeech_100_conformer">🤗</a></p>', | |
| ) | |
| if __name__ == '__main__': | |
| try: | |
| iface.launch(debug=True, | |
| # server_name=SERVER_NAME, | |
| # server_port=SERVER_PORT, | |
| enable_queue=True, | |
| # ssl_keyfile=SSL_DIR, | |
| # ssl_certfile=SSL_DIR | |
| ) | |
| except KeyboardInterrupt as e: | |
| print(e) | |
| finally: | |
| iface.close() |