transformers_js_py
from transformers_js import pipeline
import gradio as gr
import numpy as np
synthesizer = await pipeline(
'text-to-speech',
'Xenova/mms-tts-ara',
{ "quantized": False }
)
async def synthesize(text):
out = await synthesizer(text);
audio_data_memory_view = out["audio"]
sampling_rate = out["sampling_rate"]
audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
audio_data_16bit = (audio_data * 32767).astype(np.int16)
return sampling_rate, audio_data_16bit
demo = gr.Interface(synthesize, "textbox", "audio")
demo.launch()