import torch from TTS.api import TTS import os import numpy as np from scipy.io import wavfile os.environ["COQUI_TOS_AGREED"] = "1" # Save the original torch.load function _original_torch_load = torch.load # Define a custom function that forces weights_only=False def custom_torch_load(*args, **kwargs): if "weights_only" not in kwargs: kwargs["weights_only"] = False return _original_torch_load(*args, **kwargs) # Override torch.load globally torch.load = custom_torch_load device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # generate_samples() def create_audiobook(english_transcript,speaker_list): parent_wav = [] tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) for text in english_transcript: print(text['char_id'],text['text']) # voice_gender = 'Male' voice_speaker = '' if text['char_id'] != None: # voice_gender = gender_list[int(text['char_id'])][0] print("voice person",speaker_list[text['char_id']]) voice_speaker = speaker_list[text['char_id']] else: print("voice person",speaker_list[-1]) voice_speaker = speaker_list[-1] # print("Voice Gender",voice_gender) try: wav = tts.tts( text=text['text'], speaker=voice_speaker.replace('_',' '), language="en" ) print("✓ Audio Generated") parent_wav.append(wav) except Exception as e: print(f"✗ Error with : {e}") combined = np.concatenate(parent_wav) return combined,tts.synthesizer.output_sample_rate # parent_wav = generate_audio(gender_list,english_transcript,tts)