Spaces:
Running
Running
| import gradio as gr | |
| from zeroshot import ( | |
| process, | |
| WORD_SCORE_DEFAULT_IF_LM, | |
| WORD_SCORE_DEFAULT_IF_NOLM, | |
| LM_SCORE_DEFAULT, | |
| ) | |
| with gr.Blocks(css="style.css") as demo: | |
| gr.Markdown( | |
| "<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo. See our arXiV <a href='http://arxiv.org/abs/2407.17852'>paper</a> for model details.</p>" | |
| ) | |
| gr.HTML( | |
| """<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.<br>We recommend having a minimum of 10000 sentences in the textfile to acheive a good performance.</center>""" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)") | |
| with gr.Row(): | |
| words_file = gr.File(label="Text Data") | |
| lm_file = gr.File(label="Language Model\n(optional)") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| gr.Markdown( | |
| "The following parameters are used for beam-search decoding. Use the default values if you are not sure." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| wscore_usedefault = gr.Checkbox( | |
| label="Use Default Word Insertion Score", value=True | |
| ) | |
| wscore = gr.Slider( | |
| minimum=-10.0, | |
| maximum=10.0, | |
| value=WORD_SCORE_DEFAULT_IF_LM, | |
| step=0.1, | |
| interactive=False, | |
| label="Word Insertion Score", | |
| ) | |
| with gr.Column(): | |
| lmscore_usedefault = gr.Checkbox( | |
| label="Use Default Language Model Score", value=True | |
| ) | |
| lmscore = gr.Slider( | |
| minimum=-10.0, | |
| maximum=10.0, | |
| value=LM_SCORE_DEFAULT, | |
| step=0.1, | |
| interactive=False, | |
| label="Language Model Score", | |
| ) | |
| with gr.Column(): | |
| autolm = gr.Checkbox( | |
| label="Automatically create Unigram LM from text data", | |
| value=True, | |
| ) | |
| btn = gr.Button("Submit", elem_id="submit") | |
| def update_slider(ws, ls, lm, alm): | |
| ws_slider = gr.Slider( | |
| minimum=-10.0, | |
| maximum=10.0, | |
| value=LM_SCORE_DEFAULT if (lm is not None or alm) else 0, | |
| step=0.1, | |
| interactive=not ws, | |
| label="Word Insertion Score", | |
| ) | |
| ls_slider = gr.Slider( | |
| minimum=-10.0, | |
| maximum=10.0, | |
| value=WORD_SCORE_DEFAULT_IF_NOLM | |
| if (lm is None and not alm) | |
| else WORD_SCORE_DEFAULT_IF_LM, | |
| step=0.1, | |
| interactive=not ls, | |
| label="Language Model Score", | |
| ) | |
| return ws_slider, ls_slider | |
| with gr.Column(): | |
| text = gr.Textbox(label="Transcript") | |
| with gr.Accordion("Logs", open=False): | |
| logs = gr.Textbox(show_label=False) | |
| # hack | |
| reference = gr.Textbox(label="Reference Transcript", visible=False) | |
| btn.click( | |
| process, | |
| inputs=[ | |
| audio, | |
| words_file, | |
| lm_file, | |
| wscore, | |
| lmscore, | |
| wscore_usedefault, | |
| lmscore_usedefault, | |
| autolm, | |
| reference, | |
| ], | |
| outputs=[text, logs], | |
| ) | |
| # Examples | |
| gr.Examples( | |
| examples=[ | |
| # ["upload/english/english.mp3", "upload/english/c4_25k_sentences.txt"], | |
| [ | |
| "upload/english/english.mp3", | |
| "upload/english/c4_10k_sentences.txt", | |
| " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", | |
| ], | |
| [ | |
| "upload/english/english.mp3", | |
| "upload/english/c4_5k_sentences.txt", | |
| " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", | |
| ], | |
| [ | |
| "upload/english/english.mp3", | |
| "upload/english/gutenberg_27045.txt", | |
| " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", | |
| ], | |
| ], | |
| inputs=[audio, words_file, reference], | |
| label="English", | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| # ["upload/english/english.mp3", "upload/english/c4_25k_sentences.txt"], | |
| [ | |
| "upload/ligurian/ligurian_1.mp3", | |
| "upload/ligurian/zenamt_10k_sentences.txt", | |
| "I mæ colleghi m’an domandou d’aggiuttâli à fâ unna preuva co-o zeneise pe vedde s’o fonçioña.", | |
| ], | |
| [ | |
| "upload/ligurian/ligurian_2.mp3", | |
| "upload/ligurian/zenamt_10k_sentences.txt", | |
| "Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.", | |
| ], | |
| [ | |
| "upload/ligurian/ligurian_3.mp3", | |
| "upload/ligurian/zenamt_5k_sentences.txt", | |
| "Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l’euio, o formaggio, l’aggio e a sâ.", | |
| ], | |
| ], | |
| inputs=[audio, words_file, reference], | |
| label="Ligurian", | |
| ) | |
| demo.launch() | |