jhansss commited on
Commit
bc5d466
·
1 Parent(s): 9f5df1a

Replace kanjiconv dependency with pykakasi for broader Python compatibility

Browse files
Files changed (1) hide show
  1. modules/utils/g2p.py +6 -4
modules/utils/g2p.py CHANGED
@@ -3,12 +3,12 @@ import re
3
  import warnings
4
  from pathlib import Path
5
 
6
- from kanjiconv import KanjiConv
7
  from pypinyin import lazy_pinyin
8
 
9
  from .resources.pinyin_dict import PINYIN_DICT
10
 
11
- kanji_to_kana = KanjiConv()
12
 
13
  yoon_map = {
14
  "ぁ": "あ",
@@ -32,9 +32,9 @@ for plan in ace_phonemes_all_plans["plans"]:
32
 
33
 
34
  def preprocess_text(text: str, language: str) -> list[str]:
35
- text = text.replace(" ", "")
36
  if language == "mandarin":
37
  text_list = to_pinyin(text)
 
38
  elif language == "japanese":
39
  text_list = to_kana(text)
40
  else:
@@ -117,7 +117,9 @@ def replace_chouonpu(hiragana_text: str) -> str:
117
 
118
 
119
  def to_kana(text: str) -> list[str]:
120
- hiragana_text = kanji_to_kana.to_hiragana(text.replace(" ", ""))
 
 
121
  hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ")
122
  final_ls = []
123
  for subword in hiragana_text_wl:
 
3
  import warnings
4
  from pathlib import Path
5
 
6
+ import pykakasi
7
  from pypinyin import lazy_pinyin
8
 
9
  from .resources.pinyin_dict import PINYIN_DICT
10
 
11
+ kks = pykakasi.kakasi()
12
 
13
  yoon_map = {
14
  "ぁ": "あ",
 
32
 
33
 
34
  def preprocess_text(text: str, language: str) -> list[str]:
 
35
  if language == "mandarin":
36
  text_list = to_pinyin(text)
37
+ text_list = [pinyin for pinyin in text_list if pinyin != " "]
38
  elif language == "japanese":
39
  text_list = to_kana(text)
40
  else:
 
117
 
118
 
119
  def to_kana(text: str) -> list[str]:
120
+ hiragana_text = "".join(
121
+ [item["hira"] for item in kks.convert(text.replace(" ", ""))]
122
+ )
123
  hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ")
124
  final_ls = []
125
  for subword in hiragana_text_wl: