import subprocess import spaces import os # Run the setup.py install command try: subprocess.run(['python', 'setup.py', 'install', '--user'], check=True) print("Installation successful.") except subprocess.CalledProcessError as e: print(f"Installation failed with error: {e}") import gradio as gr import torch from TTS.api import TTS # Get device device = "cuda" if torch.cuda.is_available() else "cpu" # Initialize TTS model globally but load it inside the GPU-decorated function tts = None @spaces.GPU(duration=120) # Voice cloning can take longer than default 60s def initialize_tts(): global tts if tts is None: tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) return tts @spaces.GPU(duration=120) def voice_clone(text: str, speaker_wav: str, language: str): global tts # Initialize TTS if not already done if tts is None: tts = initialize_tts() # Create output directory if it doesn't exist os.makedirs("outputs", exist_ok=True) output_path = os.path.join("outputs", "output.wav") # Run TTS print("Speaker wav:", speaker_wav) tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path=output_path) return output_path # Create Gradio interface iface = gr.Interface( fn=voice_clone, theme="Nymbo/Nymbo_Theme", inputs=[ gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"), gr.Audio(type="filepath", label="Upload audio file"), gr.Radio( ['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language" ), ], outputs=gr.Audio(type="filepath", label="Generated audio file"), title="Voice Cloning", description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes." ) # Launch with queue enabled for better handling of GPU resources iface.queue().launch()