Sonofica / utils /create_audiobook.py
janmayjay's picture
Add application file
39a7537
raw
history blame
1.79 kB
import torch
from TTS.api import TTS
import os
import numpy as np
from scipy.io import wavfile
# Save the original torch.load function
_original_torch_load = torch.load
# Define a custom function that forces weights_only=False
def custom_torch_load(*args, **kwargs):
if "weights_only" not in kwargs:
kwargs["weights_only"] = False
return _original_torch_load(*args, **kwargs)
# Override torch.load globally
torch.load = custom_torch_load
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# generate_samples()
def create_audiobook(gender_list,english_transcript,speaker_list):
parent_wav = []
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
for text in english_transcript:
print(text['char_id'],text['text'])
voice_gender = 'Male'
voice_speaker = ''
if text['char_id'] != None:
voice_gender = gender_list[int(text['char_id'])][0]
print("voice person",speaker_list[text['char_id']])
voice_speaker = speaker_list[text['char_id']]
else:
print("voice person",speaker_list[-1])
voice_speaker = speaker_list[-1]
print("Voice Gender",voice_gender)
try:
wav = tts.tts(
text=text['text'],
speaker=voice_speaker.replace('_',' '),
language="en"
)
print("✓ Audio Generated")
parent_wav.append(wav)
except Exception as e:
print(f"✗ Error with : {e}")
combined = np.concatenate(parent_wav)
return combined,tts.synthesizer.output_sample_rate
# parent_wav = generate_audio(gender_list,english_transcript,tts)