File size: 2,140 Bytes
31a0ae0
 
 
 
 
1feded7
2567541
31a0ae0
 
bcc4745
276c782
31a0ae0
 
 
1420613
3bbe570
31a0ae0
 
 
 
 
 
1feded7
 
 
 
bcc4745
31a0ae0
 
bcc4745
 
31a0ae0
1feded7
 
276c782
31a0ae0
 
1feded7
bcc4745
31a0ae0
276c782
bcc4745
1feded7
bcc4745
 
1feded7
bcc4745
1feded7
 
 
 
 
bcc4745
 
1feded7
bcc4745
 
1feded7
 
 
 
 
 
 
 
31a0ae0
bb26554
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
import gradio as gr
import pytube as pt
from transformers import pipeline

MODEL_NAME = "ales/whisper-small-belarusian"
lang = "be"

device = 0 if torch.cuda.is_available() else "cpu"
print(f"Device set to use {device}")

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=8,
    stride_length_s=1,
    device=device,
)

pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")


def transcribe(audio_file):
    if audio_file is None:
        return "ERROR: Please upload or record audio"
    return pipe(audio_file)["text"]


def yt_transcribe(yt_url):
    if not yt_url:
        return "", "ERROR: You must provide a YouTube URL."
    yt = pt.YouTube(yt_url)
    video_id = yt_url.split("?v=")[-1].split("&")[0]
    embed = f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allowfullscreen></iframe></center>'
    stream = yt.streams.filter(only_audio=True).first()
    stream.download(filename="audio.mp3")
    text = pipe("audio.mp3")["text"]
    return embed, text


with gr.Blocks() as demo:
    with gr.Tab("🎤 Transcribe Audio"):
        gr.Markdown("## Запішы або загрузі аўдыё")
        audio_input = gr.Audio(type="filepath", label="Record or Upload Audio")
        transcribe_button = gr.Button("Transcribe")
        transcription_output = gr.Textbox(label="Transcription")

        transcribe_button.click(
            fn=transcribe,
            inputs=[audio_input],
            outputs=[transcription_output],
        )

    with gr.Tab("📺 Transcribe YouTube"):
        gr.Markdown("## Устаў спасылку на YouTube-відэа")
        yt_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
        yt_button = gr.Button("Transcribe YouTube")
        yt_embed = gr.HTML()
        yt_text = gr.Textbox(label="Transcription")

        yt_button.click(
            fn=yt_transcribe,
            inputs=[yt_input],
            outputs=[yt_embed, yt_text],
        )

demo.launch()