|
import torch
|
|
from TTS.api import TTS
|
|
import os
|
|
import numpy as np
|
|
from scipy.io import wavfile
|
|
|
|
|
|
_original_torch_load = torch.load
|
|
|
|
|
|
def custom_torch_load(*args, **kwargs):
|
|
if "weights_only" not in kwargs:
|
|
kwargs["weights_only"] = False
|
|
return _original_torch_load(*args, **kwargs)
|
|
|
|
|
|
torch.load = custom_torch_load
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
print(f"Using device: {device}")
|
|
|
|
|
|
|
|
def create_audiobook(gender_list,english_transcript,speaker_list):
|
|
parent_wav = []
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
|
for text in english_transcript:
|
|
print(text['char_id'],text['text'])
|
|
voice_gender = 'Male'
|
|
voice_speaker = ''
|
|
if text['char_id'] != None:
|
|
voice_gender = gender_list[int(text['char_id'])][0]
|
|
print("voice person",speaker_list[text['char_id']])
|
|
voice_speaker = speaker_list[text['char_id']]
|
|
else:
|
|
print("voice person",speaker_list[-1])
|
|
voice_speaker = speaker_list[-1]
|
|
print("Voice Gender",voice_gender)
|
|
|
|
try:
|
|
|
|
wav = tts.tts(
|
|
text=text['text'],
|
|
speaker=voice_speaker.replace('_',' '),
|
|
language="en"
|
|
)
|
|
print("✓ Audio Generated")
|
|
parent_wav.append(wav)
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error with : {e}")
|
|
combined = np.concatenate(parent_wav)
|
|
return combined,tts.synthesizer.output_sample_rate
|
|
|
|
|