Spaces:
Running
Running
Replace kanjiconv dependency with pykakasi for broader Python compatibility
Browse files- modules/utils/g2p.py +6 -4
modules/utils/g2p.py
CHANGED
|
@@ -3,12 +3,12 @@ import re
|
|
| 3 |
import warnings
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
-
|
| 7 |
from pypinyin import lazy_pinyin
|
| 8 |
|
| 9 |
from .resources.pinyin_dict import PINYIN_DICT
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
yoon_map = {
|
| 14 |
"ぁ": "あ",
|
|
@@ -32,9 +32,9 @@ for plan in ace_phonemes_all_plans["plans"]:
|
|
| 32 |
|
| 33 |
|
| 34 |
def preprocess_text(text: str, language: str) -> list[str]:
|
| 35 |
-
text = text.replace(" ", "")
|
| 36 |
if language == "mandarin":
|
| 37 |
text_list = to_pinyin(text)
|
|
|
|
| 38 |
elif language == "japanese":
|
| 39 |
text_list = to_kana(text)
|
| 40 |
else:
|
|
@@ -117,7 +117,9 @@ def replace_chouonpu(hiragana_text: str) -> str:
|
|
| 117 |
|
| 118 |
|
| 119 |
def to_kana(text: str) -> list[str]:
|
| 120 |
-
hiragana_text =
|
|
|
|
|
|
|
| 121 |
hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ")
|
| 122 |
final_ls = []
|
| 123 |
for subword in hiragana_text_wl:
|
|
|
|
| 3 |
import warnings
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import pykakasi
|
| 7 |
from pypinyin import lazy_pinyin
|
| 8 |
|
| 9 |
from .resources.pinyin_dict import PINYIN_DICT
|
| 10 |
|
| 11 |
+
kks = pykakasi.kakasi()
|
| 12 |
|
| 13 |
yoon_map = {
|
| 14 |
"ぁ": "あ",
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def preprocess_text(text: str, language: str) -> list[str]:
|
|
|
|
| 35 |
if language == "mandarin":
|
| 36 |
text_list = to_pinyin(text)
|
| 37 |
+
text_list = [pinyin for pinyin in text_list if pinyin != " "]
|
| 38 |
elif language == "japanese":
|
| 39 |
text_list = to_kana(text)
|
| 40 |
else:
|
|
|
|
| 117 |
|
| 118 |
|
| 119 |
def to_kana(text: str) -> list[str]:
|
| 120 |
+
hiragana_text = "".join(
|
| 121 |
+
[item["hira"] for item in kks.convert(text.replace(" ", ""))]
|
| 122 |
+
)
|
| 123 |
hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ")
|
| 124 |
final_ls = []
|
| 125 |
for subword in hiragana_text_wl:
|