talk-to-ultravox

Running on L40S

App Files Files Community

talk-to-ultravox / app.py

freddyaboulton HF Staff

code

1ac399b 8 months ago

raw

history blame

2.4 kB

	import gradio as gr
	from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
	import transformers
	import numpy as np
	from twilio.rest import Client
	import os


	pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True)


	account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
	auth_token = os.environ.get("TWILIO_AUTH_TOKEN")

	if account_sid and auth_token:
	client = Client(account_sid, auth_token)

	token = client.tokens.create()

	rtc_configuration = {
	"iceServers": token.ice_servers,
	"iceTransportPolicy": "relay",
	}
	else:
	rtc_configuration = None



	def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]):

	output = pipe({"audio": audio[1], "turns": conversation, "sampling_rate": audio[0]},
	max_new_tokens=512)

	conversation.append({"role": "user", "content": output["transcription"]})
	conversation.append({"role": "assistant", "content": output["reply"]})

	yield AdditionalOutputs(conversation)


	with gr.Blocks() as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Talk to Ultravox Llama 3.1 8b (Powered by WebRTC ⚡️)
	</h1>
	<p style='text-align: center'>
	Once you grant access to your microphone, you can talk naturally to Ultravox.
	When you stop talking, the audio will be sent for processing.
	</p>
	<p style='text-align: center'>
	Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
	</p>
	"""
	)
	transformers_convo = gr.State(value=[{
	"role": "system",
	"content": "You are a friendly and helpful character. You love to answer questions for people."
	}])
	with gr.Row():
	with gr.Column():
	audio = WebRTC(
	rtc_configuration=rtc_configuration,
	label="Stream",
	mode="send",
	modality="audio",
	)
	with gr.Column():
	transcript = gr.Chatbot(label="transcript", type="messages")

	audio.stream(ReplyOnPause(transcribe), inputs=[audio, transformers_convo, transcript], outputs=[audio], time_limit=90)
	audio.on_additional_outputs(lambda s,a: (s,a), outputs=[transformers_convo, transcript],
	queue=False, show_progress="hidden")

	if __name__ == "__main__":
	demo.launch()