Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,047 Bytes
a5c86e8 802577d a5c86e8 1d2e2ec 5457abc 1d2e2ec a5c86e8 9b2107c 9ec632b 9b2107c 802577d 9b2107c 802577d 9b2107c 802577d 9b2107c 802577d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import subprocess
import spaces
import os
# Run the setup.py install command
try:
subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
print("Installation successful.")
except subprocess.CalledProcessError as e:
print(f"Installation failed with error: {e}")
import gradio as gr
import torch
from TTS.api import TTS
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS model globally but load it inside the GPU-decorated function
tts = None
@spaces.GPU(duration=120) # Voice cloning can take longer than default 60s
def initialize_tts():
global tts
if tts is None:
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
return tts
@spaces.GPU(duration=120)
def voice_clone(text: str, speaker_wav: str, language: str):
global tts
# Initialize TTS if not already done
if tts is None:
tts = initialize_tts()
# Create output directory if it doesn't exist
os.makedirs("outputs", exist_ok=True)
output_path = os.path.join("outputs", "output.wav")
# Run TTS
print("Speaker wav:", speaker_wav)
tts.tts_to_file(text=text,
speaker_wav=speaker_wav,
language=language,
file_path=output_path)
return output_path
# Create Gradio interface
iface = gr.Interface(
fn=voice_clone,
theme="Nymbo/Nymbo_Theme",
inputs=[
gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
gr.Audio(type="filepath", label="Upload audio file"),
gr.Radio(
['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'],
label="language"
),
],
outputs=gr.Audio(type="filepath", label="Generated audio file"),
title="Voice Cloning",
description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes."
)
# Launch with queue enabled for better handling of GPU resources
iface.queue().launch() |