Converted the models from https://github.com/taylorchu/2cent-tts to .safetensors. Below is inference code:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model = AutoModelForCausalLM.from_pretrained(
"timBoML/2cent-tts-25m"
)
tokenizer = AutoTokenizer.from_pretrained("timBoML/2cent-tts-25m")
phones = "həlˈoʊ aɪɐm tˈuː sˈɛnt tˌiːtˌiːˈɛs" # using espeak-ng
input_ids = (
tokenizer.encode(phones, add_special_tokens=False)
+ tokenizer.encode("<s>", add_special_tokens=False)
+ [4136]
)
input_ids = torch.tensor(input_ids).unsqueeze(0)
generated_ids = model.generate(
input_ids=input_ids,
max_new_tokens=2048,
)
generated_ids = generated_ids.squeeze()
tokens = generated_ids[input_ids.shape[1]:]
first_audio_token = tokenizer.encode("<audio_0>")[-1]
tokens = tokens - first_audio_token
import locale
import torchaudio.transforms as T
import os
import torch
from snac import SNAC
locale.getpreferredencoding = lambda: "UTF-8"
snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
def redistribute_codes(code_list):
layer_1 = []
layer_2 = []
layer_3 = []
for i in range((len(code_list)+1)//7):
layer_1.append(code_list[7*i])
layer_2.append(code_list[7*i+1])
layer_3.append(code_list[7*i+2])
layer_3.append(code_list[7*i+3])
layer_2.append(code_list[7*i+4])
layer_3.append(code_list[7*i+5])
layer_3.append(code_list[7*i+6])
codes = [torch.tensor(layer_1).unsqueeze(0),
torch.tensor(layer_2).unsqueeze(0),
torch.tensor(layer_3).unsqueeze(0)]
audio_hat = snac_model.decode(codes)
return audio_hat
sample = redistribute_codes(tokens)
from IPython.display import Audio, display
display(Audio(sample.detach().squeeze().to("cpu").numpy(), rate=24000))
- Downloads last month
- 29
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support