Malaysian Parler TTS Tiny V1
Finetuned https://huggingface.co/parler-tts/parler-tts-tiny-v1 on Mesolitica/TTS
Source code at https://github.com/mesolitica/malaya-speech/tree/master/session/parler-tts
Wandb at https://wandb.ai/huseinzol05/malaysian-parler-tts-tiny-v1
requirements
pip3 install git+https://github.com/malaysia-ai/async-parler-tts
how to
import torch
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
from pypinyin import lazy_pinyin, Style
import soundfile as sf
import malaya
import jieba
normalizer = malaya.normalize.normalizer()
jieba.initialize()
def is_chinese(c):
return (
"\u3100" <= c <= "\u9fff"
)
def convert_char_to_pinyin(text_list, polyphone=True):
final_text_list = []
custom_trans = str.maketrans(
{";": ",", "β": '"', "β": '"', "β": "'", "β": "'", 'οΌ': ', ', 'οΌ': '. ', 'γ': '. '}
)
for text in text_list:
char_list = []
text = text.translate(custom_trans)
for seg in jieba.cut(text):
seg_byte_len = len(bytes(seg, "UTF-8"))
if seg_byte_len == len(seg): # if pure alphabets and symbols
if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
char_list.append(" ")
char_list.extend(seg)
elif polyphone and seg_byte_len == 3 * len(seg): # if pure east asian characters
seg_ = lazy_pinyin(seg, style=Style.TONE3, tone_sandhi=True)
for i, c in enumerate(seg):
if is_chinese(c):
char_list.append(" ")
char_list.append(seg_[i])
else: # if mixed characters, alphabets and symbols
for c in seg:
if ord(c) < 256:
char_list.extend(c)
elif is_chinese(c):
char_list.append(" ")
char_list.extend(lazy_pinyin(c, style=Style.TONE3, tone_sandhi=True))
else:
char_list.append(c)
final_text_list.append(char_list)
return final_text_list
def normalize(text):
converted = convert_char_to_pinyin(text.split())
converted = [''.join(c) for c in converted]
return ' '.join(converted).strip()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = ParlerTTSForConditionalGeneration.from_pretrained("mesolitica/malaysian-parler-tts-mini-v1").to(device)
tokenizer = AutoTokenizer.from_pretrained("mesolitica/malaysian-parler-tts-mini-v1")
speakers = [
'Husein',
'Shafiqah Idayu',
'Anwar Ibrahim',
'KP'
]
# Also support context switching
prompt = 'Husein zolkepli sangat comel dan kacak suka makan cendol. ε
Άε½’ζηι¨εΊζ°εεθ₯ιγ, AI, ζ°εεζε‘γζ°εεγη¨ζ·ζ°εεηζ°εεζηδ½η³»'
prompt = normalizer.normalize(prompt)
prompt = normalize(prompt['normalize'])
for s in speakers:
description = s
input_ids = tokenizer(description, return_tensors="pt").to(device)
prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
generation = model.generate(
input_ids=input_ids.input_ids,
attention_mask=input_ids.attention_mask,
prompt_input_ids=prompt_input_ids.input_ids,
prompt_attention_mask=prompt_input_ids.attention_mask,
)
audio_arr = generation.cpu()
sf.write(f'{s}.mp3', audio_arr.numpy().squeeze(), 44100)
- Downloads last month
- 22
Inference Providers
NEW
This model is not currently available via any of the supported third-party Inference Providers, and
the model is not deployed on the HF Inference API.