transformers_js_py from transformers_js_py import import_transformers_js import gradio as gr import numpy as np transformers_js = await import_transformers_js("3.0.2") pipeline = transformers_js.pipeline synthesizer = await pipeline( 'text-to-speech', 'Xenova/speecht5_tts', { "quantized": False } ) speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'; async def synthesize(text): out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings }); audio_data_memory_view = out["audio"] sampling_rate = out["sampling_rate"] audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) audio_data_16bit = (audio_data * 32767).astype(np.int16) return sampling_rate, audio_data_16bit demo = gr.Interface(synthesize, "textbox", "audio") demo.launch()