LisaTTS / text /cleaners.py
kdrkdrkdr's picture
add files
227479d
raw
history blame contribute delete
No virus
1.54 kB
import re
def japanese_cleaners(text):
from text.japanese import japanese_to_romaji_with_accent
text = japanese_to_romaji_with_accent(text)
if len(text) == 0 or re.match('[A-Za-z]', text[-1]):
text += '.'
return text
def japanese_cleaners2(text):
text = text.replace('・・・', '…').replace('・', ' ')
text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…') \
.replace('(', '').replace(')', '') \
.replace('[', '').replace(']', '') \
.replace('*', ' ').replace('{', '').replace('}', '')
return text
def ko2kata(text):
return text
def en2kata(text):
return text
def jke_cleaners(text):
japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
korean_texts = re.findall(r'\[KO\].*?\[KO\]', text)
english_texts = re.findall(r'\[EN\].*?\[EN\]', text)
for japanese_text in japanese_texts:
cleaned_text = japanese_text[4:-4]
text = text.replace(japanese_text, cleaned_text+' ', 1)
for korean_text in korean_texts:
cleaned_text = ko2kata(korean_text[4:-4])
text = text.replace(korean_text, cleaned_text+' ', 1)
for english_text in english_texts:
cleaned_text = en2kata(english_text[4:-4])
text = text.replace(english_text, cleaned_text+' ', 1)
text = japanese_cleaners2(text)
text = text[:-1]
if re.match(r'[^\.,!\?\-…~]', text[-1]):
text += '.'
return text