Chrysoula's picture
Update app.py
ee53ffb
raw
history blame
1.55 kB
from transformers import pipeline
import gradio as gr
import pytube as pt
pipe = pipeline(model="Hoft/whisper-small-swedish-asr") # change to "your-username/the-name-you-picked"
def microphone_or_file_transcribe(audio):
text = pipe(audio)["text"]
return text
def youtube_transcribe(url):
yt = pt.YouTube(url)
stream = yt.streams.filter(only_audio=True)[0]
stream.download(filename="audio.mp3")
text = pipe("audio.mp3")["text"]
return text
app = gr.Blocks()
microphone_tab = gr.Interface(
fn=microphone_or_file_transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
youtube_tab = gr.Interface(
fn=youtube_transcribe,
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video", label="URL")],
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
file_tab = gr.Interface(
fn=microphone_or_file_transcribe,
inputs= gr.inputs.Audio(source="upload", type="filepath"),
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
with app:
gr.TabbedInterface([microphone_tab, youtube_tab, file_tab], ["Microphone", "YouTube", "File"])
app.launch(enable_queue=True)