transformers_js_py
from transformers_js import pipeline
import gradio as gr
import numpy as np
import scipy.io.wavfile as wavfile
speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
async def synthesize(text):
synthesizer = await pipeline(
'text-to-speech',
'Xenova/speecht5_tts',
{ "quantized": False }
) # Put the pipeline initializer inside the function to show the first view of the app faster
out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings });
audio_data_memory_view = out["audio"]
sampling_rate = out["sampling_rate"]
audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
wavfile.write('output.wav', sampling_rate, audio_data)
return "output.wav"
demo = gr.Interface(synthesize, "textbox", "audio")
demo.launch()