| import re | |
| from text.japanese import japanese_to_romaji_with_accent | |
| from text.k2j import korean2katakana | |
| from text.symbols import symbols | |
| _cleaner_cleans = re.compile('['+'^'.join(symbols)+']') | |
| def japanese_cleaners(text): | |
| text = japanese_to_romaji_with_accent(text) | |
| text = re.sub(r'([A-Za-z])$', r'\1.', text).replace('ts', 'ʦ').replace('...', '…') | |
| return text | |
| def japanese_cleaners2(text): | |
| text = re.sub(r'\[KO\](.*?)\[KO\]', lambda x: '[JA]'+korean2katakana(x.group(1))+'.[JA]', text) | |
| text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_cleaners(x.group(1))+' ', text) | |
| text = ''.join(_cleaner_cleans.findall(text)).replace(' ', '') | |
| return text |