import torch import gradio as gr from transformers import pipeline model_id = "Sandiago21/whisper-large-v2-german-2" # update with your model id pipe = pipeline("automatic-speech-recognition", model=model_id) title = "Automatic Speech Recognition (ASR)" description = """ Demo for automatic speech recognition in German. Demo uses [Sandiago21/whisper-large-v2-german-2](https://huggingface.co/Sandiago21/whisper-large-v2-german-2) checkpoint, which is based on OpenAI's [Whisper](https://huggingface.co/openai/whisper-large-v2) model and is fine-tuned in German Audio dataset ![Automatic Speech Recognition (ASR)"](https://datasets-server.huggingface.co/assets/huggingface-course/audio-course-images/--/huggingface-course--audio-course-images/train/2/image/image.png "Diagram of Automatic Speech Recognition (ASR)") """ def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "german", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return output["text"] demo = gr.Blocks() mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="microphone", type="filepath"), outputs=gr.outputs.Textbox(), tilte=title, description=description, ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="upload", type="filepath"), outputs=gr.outputs.Textbox(), examples=[["./example.wav"]], tilte=title, description=description, ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ), demo.launch()