|
import gradio as gr |
|
import vowel_length |
|
import ctcalign |
|
|
|
|
|
meta_tsv = ['data/set1.tsv','data/set2.tsv'] |
|
ph_key = 'data/key_all.tsv' |
|
align_output = 'data/align_csv.pickle' |
|
|
|
dat,vck,kws,csvs = vowel_length.setup(meta_tsv,ph_key,align_output) |
|
|
|
|
|
|
|
|
|
|
|
def manager(word,group,aligner,side): |
|
fig = vowel_length.runan(word,group,aligner,vck,dat,csvs) |
|
print(side) |
|
return fig |
|
|
|
def aligning(transcript, audio, language): |
|
formatted_output = ctcalign.langsalign(audio,transcript,language) |
|
return formatted_output |
|
|
|
|
|
bl = gr.Blocks() |
|
with bl: |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("CTC alignment"): |
|
gr.Markdown( |
|
""" |
|
# Forced alignment with CTC decoding |
|
Choose a language to upload a sentence with corresponding text. Generate word and letter time-alignments from the language's wav2vec-2.0 model, with output in MFA (Montreal Forced Aligner)-compatible format. It is best to upload short recordings of a sentence or so; recordings over a couple minutes require excessive memory to align, and should be divided into shorter pieces. Use only lower case letters with no punctuation. |
|
""" |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
Contact [email protected] with feedback, problems, and to request changes. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
transcript_boxx = gr.Textbox(label="Transcript",placeholder="Type or paste the transcript here. Capitalisation and punctuation, if any, will be ignored.") |
|
|
|
audio_file = gr.Audio(type="filepath") |
|
|
|
alangmenu = gr.Radio(["Icelandic", "Faroese", "Norwegian"],value="Icelandic",label="Language") |
|
|
|
al_btn = gr.Button(value="Run forced alignment") |
|
|
|
with gr.Column(): |
|
output_box = gr.Textbox(label="Forced alignment output") |
|
|
|
al_btn.click(aligning, [transcript_boxx, audio_file, alangmenu], output_box) |
|
|
|
|
|
with gr.TabItem("Vowel quantity"): |
|
gr.Markdown( |
|
""" |
|
# Long and short Icelandic vowels |
|
Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0. If the graph shows "Error" this means there is not data for the selected word, speaker group, and alignment type. |
|
|
|
The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration. |
|
|
|
All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
wmenu1 = gr.Dropdown(kws,label="Word",value="hala") |
|
lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1") |
|
amenu1 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC") |
|
|
|
btn1 = gr.Button(value="Update Plot 1") |
|
|
|
pl1 = gr.Plot() |
|
btn1.click(manager, [wmenu1, lmenu1, amenu1, btn1], pl1) |
|
|
|
|
|
with gr.Column(): |
|
wmenu2 = gr.Dropdown(kws,label="Word",value="halla") |
|
lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1") |
|
amenu2 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC") |
|
|
|
btn2 = gr.Button(value="Update Plot 2") |
|
pl2 = gr.Plot() |
|
btn2.click(manager, [wmenu2, lmenu2, amenu2, btn2], pl2) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
bl.launch() |
|
|
|
|
|
|