transformers_js_py from transformers_js import pipeline import gradio as gr import numpy as np synthesizer = await pipeline( 'text-to-speech', 'Xenova/mms-tts-ara', { "quantized": False } ) async def synthesize(text): out = await synthesizer(text); audio_data_memory_view = out["audio"] sampling_rate = out["sampling_rate"] audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32) audio_data_16bit = (audio_data * 32767).astype(np.int16) return sampling_rate, audio_data_16bit demo = gr.Interface(synthesize, "textbox", "audio") demo.launch()