transformers_js_py from transformers_js import pipeline import gradio as gr import numpy as np import scipy.io.wavfile as wavfile speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'; async def synthesize(text): synthesizer = await pipeline( 'text-to-speech', 'Xenova/speecht5_tts', { "quantized": False } ) # Put the pipeline initializer inside the function to show the first view of the app faster out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings }); audio_data_memory_view = out["audio"] sampling_rate = out["sampling_rate"] audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) wavfile.write('output.wav', sampling_rate, audio_data) return "output.wav" demo = gr.Interface(synthesize, "textbox", "audio") demo.launch()