|
import gradio as gr |
|
import librosa |
|
import soundfile as sf |
|
import torch |
|
import warnings |
|
import os |
|
from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2CTCTokenizer, Wav2Vec2Model |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from speechbrain.pretrained import EncoderDecoderASR |
|
|
|
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-wav2vec2-commonvoice-rw", savedir="pretrained_models/asr-wav2vec2-commonvoice-rw") |
|
asr_model.transcribe_file("speechbrain/asr-wav2vec2-commonvoice-rw/example.mp3") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def asr_transcript(audio, audio_microphone, model_params): |
|
|
|
|
|
audio = audio_microphone if audio_microphone else audio |
|
|
|
if audio == None and audio_microphone == None: |
|
return "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)", "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)" |
|
text = "" |
|
|
|
if audio: |
|
text = asr_model.transcribe_file(audio.name) |
|
|
|
return text |
|
else: |
|
return "File not valid" |
|
|
|
gradio_ui = gr.Interface( |
|
fn=asr_transcript, |
|
title="Kinyarwanda Speech Recognition", |
|
description="Upload an audio clip or record from browser using microphone, and let AI do the hard work of transcribing.", |
|
article = """ |
|
This demo showcases the pretrained model from deepspeech. |
|
""", |
|
inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=True, label="Record from microphone"), gr.inputs.Dropdown(choices=["deepspeech","coqui (soon)"], type="value", default="deepspeech", label="Select speech recognition model ", optional=False)], |
|
outputs=[gr.outputs.Textbox(label="Recognized speech")] |
|
) |
|
|
|
gradio_ui.launch(share=True) |