|
import os, pickle |
|
import numpy as np |
|
from collections import defaultdict |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
def getwps(start,end,phones): |
|
return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)] |
|
|
|
|
|
def read_mfa(afile): |
|
|
|
|
|
f = [l.split(',') for l in afile.splitlines()] |
|
wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words'] |
|
plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones'] |
|
aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines] |
|
return aligns |
|
|
|
|
|
|
|
def read_ph_key(fpath): |
|
def _winfo(l): |
|
def _f(n): |
|
return tuple([int(i) if i != 'X' else i for i in n.split(',')]) |
|
return (_f(l[3]),_f(l[4])) |
|
|
|
def _d2d(dic): |
|
if isinstance(dic, dict): |
|
dic = {k: _d2d(v) for k, v in dic.items()} |
|
return dic |
|
|
|
with open(fpath,'r') as handle: |
|
f = handle.read().splitlines() |
|
f = [l.split('\t') for l in f[1:]] |
|
|
|
vcdict = {'ctc': defaultdict(lambda: defaultdict(tuple)), \ |
|
'mfa': defaultdict(lambda: defaultdict(tuple))} |
|
for wline in f: |
|
vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline) |
|
|
|
return _d2d(vcdict) |
|
|
|
|
|
def get_vc_dur(kwd,atype,dat,vcd,csvdict): |
|
d = [l for l in dat if kwd in l[-1]] |
|
prlist = [] |
|
|
|
if atype=='ctc': |
|
pspel = kwd |
|
|
|
for l in d: |
|
akey = f'{atype}_csv/{l[3].split(".")[0]}.csv' |
|
if akey in csvdict.keys(): |
|
aligns = read_mfa(csvdict[akey]) |
|
aligns = [a for a in aligns if a[0] == kwd] |
|
for al in aligns: |
|
if atype == 'mfa': |
|
pspel = ' '.join([a for a,s,e in al[3]]) |
|
|
|
vstart = al[3][vcd[pspel][0][0]][1] |
|
vend = al[3][vcd[pspel][0][-1]][2] |
|
cstart = al[3][vcd[pspel][1][0]][1] |
|
cend = al[3][vcd[pspel][1][-1]][2] |
|
vdur = vend-vstart |
|
cdur = cend-cstart |
|
prlist.append(tuple([vdur,cdur])) |
|
return prlist |
|
|
|
|
|
|
|
|
|
|
|
def displ(prinfo,kwd): |
|
rto = np.mean([v/c for v,c in prinfo]) |
|
vs = [1000*v for v,c in prinfo] |
|
cs = [1000*c for v,c in prinfo] |
|
|
|
fig = plt.figure(figsize=(6,5)) |
|
|
|
plt.xlim([0.0, max(500,min(max(vs),1000))]) |
|
plt.ylim([0.0, max(500,min(max(cs),1000))]) |
|
plt.scatter(vs,cs) |
|
plt.axline((0,0),slope=1,color="darkgray") |
|
|
|
plt.xlabel("Vowel length (ms)") |
|
plt.ylabel("Consonant length (ms)") |
|
plt.title(f'{kwd.upper()}\nV/C duration ratio: {round(rto,2)}') |
|
return(rto,fig) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def runan(kwd,spl,aln,vck,dat,csvs): |
|
kwd = kwd.lower() |
|
spl = spl.lower() |
|
aln = aln.lower() |
|
print(kwd,aln,spl) |
|
vcd = vck[aln][kwd] |
|
|
|
if any([ tuple('X') in el for el in vcd.values()]): |
|
|
|
return "EXCLUDED WORD FOR THIS ALIGNMENT TYPE" |
|
d = dat |
|
if spl == 'l1': |
|
d = [l for l in d if l[8].lower() == 'icelandic'] |
|
if spl == 'l2': |
|
d = [l for l in d if l[8].lower() != 'icelandic'] |
|
prinfo = get_vc_dur(kwd,aln,d,vcd,csvs) |
|
if len(prinfo) >5: |
|
r,f = displ(prinfo,kwd) |
|
return(f) |
|
else: |
|
return 0 |
|
|
|
|
|
|
|
def setup(metadatas,phkey,align_csvs): |
|
def _loadr(meta): |
|
with open(meta,'r') as handle: |
|
d = handle.read().splitlines() |
|
d = [l.split('\t') for l in d[1:]] |
|
d = [l[:-1] + [tuple(l[-1].split(' '))] for l in d] |
|
return d |
|
metas = [_loadr(tsv) for tsv in metadatas] |
|
dat = [] |
|
for tsv in metas: |
|
dat += tsv |
|
|
|
vck = read_ph_key(phkey) |
|
kws = sorted(list(vck['ctc'].keys())) |
|
|
|
with open(align_csvs, 'rb') as handle: |
|
csvs = pickle.load(handle) |
|
|
|
return dat, vck, kws, csvs |
|
|
|
|
|
|
|
|