Spaces:

freddyaboulton
/

talk-to-ultravox

Running on L4

App Files Files Community

freddyaboulton HF staff commited on Nov 15, 2024

Commit

1ac399b

1 Parent(s): 1ed7130

code

Browse files

Files changed (3) hide show

app.py +75 -0
requirements.in +6 -0
requirements.txt +328 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
+import transformers
+import numpy as np
+from twilio.rest import Client
+import os
+pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True)
+account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
+auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
+if account_sid and auth_token:
+    client = Client(account_sid, auth_token)
+    token = client.tokens.create()
+    rtc_configuration = {
+        "iceServers": token.ice_servers,
+        "iceTransportPolicy": "relay",
+    }
+else:
+    rtc_configuration = None
+def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]):
+    output = pipe({"audio": audio[1], "turns": conversation, "sampling_rate": audio[0]},
+                  max_new_tokens=512)
+    conversation.append({"role": "user", "content": output["transcription"]})
+    conversation.append({"role": "assistant", "content": output["reply"]})
+    yield AdditionalOutputs(conversation)
+with gr.Blocks() as demo:
+    gr.HTML(
+    """
+    <h1 style='text-align: center'>
+    Talk to Ultravox Llama 3.1 8b (Powered by WebRTC ⚡️)
+    </h1>
+    <p style='text-align: center'>
+    Once you grant access to your microphone, you can talk naturally to Ultravox.
+    When you stop talking, the audio will be sent for processing.
+    </p>
+    <p style='text-align: center'>
+    Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
+    </p>
+    """
+    )
+    transformers_convo = gr.State(value=[{
+        "role": "system",
+        "content": "You are a friendly and helpful character. You love to answer questions for people."
+        }])
+    with gr.Row():
+        with gr.Column():
+            audio = WebRTC(
+                rtc_configuration=rtc_configuration,
+                label="Stream",
+                mode="send",
+                modality="audio",
+            )
+        with gr.Column():
+            transcript = gr.Chatbot(label="transcript", type="messages")
+    audio.stream(ReplyOnPause(transcribe), inputs=[audio, transformers_convo, transcript], outputs=[audio], time_limit=90)
+    audio.on_additional_outputs(lambda s,a: (s,a), outputs=[transformers_convo, transcript],
+                                queue=False, show_progress="hidden")
+if __name__ == "__main__":
+    demo.launch()

requirements.in ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio_webrtc[vad]==0.0.12
+numba==0.60.0
+twilio
+transformers
+accelerate
+peft

requirements.txt ADDED Viewed

	@@ -0,0 +1,328 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in -o requirements.txt
+accelerate==1.1.1
+    # via
+    #   -r requirements.in
+    #   peft
+aiofiles==23.2.1
+    # via gradio
+aiohappyeyeballs==2.4.3
+    # via aiohttp
+aiohttp==3.11.2
+    # via
+    #   aiohttp-retry
+    #   twilio
+aiohttp-retry==2.8.3
+    # via twilio
+aioice==0.9.0
+    # via aiortc
+aiortc==1.9.0
+    # via gradio-webrtc
+aiosignal==1.3.1
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.6.2.post1
+    # via
+    #   gradio
+    #   httpx
+    #   starlette
+attrs==24.2.0
+    # via aiohttp
+audioread==3.0.1
+    # via librosa
+av==12.3.0
+    # via aiortc
+certifi==2024.8.30
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via
+    #   aiortc
+    #   cryptography
+    #   pylibsrtp
+    #   soundfile
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via
+    #   typer
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+cryptography==43.0.3
+    # via
+    #   aiortc
+    #   pyopenssl
+decorator==5.1.1
+    # via librosa
+dnspython==2.7.0
+    # via aioice
+fastapi==0.115.5
+    # via gradio
+ffmpy==0.4.0
+    # via gradio
+filelock==3.16.1
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+flatbuffers==24.3.25
+    # via onnxruntime
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.10.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+google-crc32c==1.6.0
+    # via aiortc
+gradio==5.5.0
+    # via gradio-webrtc
+gradio-client==1.4.2
+    # via gradio
+gradio-webrtc==0.0.12
+    # via -r requirements.in
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.7
+    # via httpx
+httpx==0.27.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   safehttpx
+huggingface-hub==0.26.2
+    # via
+    #   accelerate
+    #   gradio
+    #   gradio-client
+    #   peft
+    #   tokenizers
+    #   transformers
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+ifaddr==0.2.0
+    # via aioice
+jinja2==3.1.4
+    # via
+    #   gradio
+    #   torch
+joblib==1.4.2
+    # via
+    #   librosa
+    #   scikit-learn
+lazy-loader==0.4
+    # via librosa
+librosa==0.10.2.post1
+    # via gradio-webrtc
+llvmlite==0.43.0
+    # via numba
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.5
+    # via
+    #   gradio
+    #   jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.0
+    # via librosa
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+networkx==3.4.2
+    # via torch
+numba==0.60.0
+    # via
+    #   -r requirements.in
+    #   librosa
+numpy==2.0.2
+    # via
+    #   accelerate
+    #   gradio
+    #   librosa
+    #   numba
+    #   onnxruntime
+    #   pandas
+    #   peft
+    #   scikit-learn
+    #   scipy
+    #   soxr
+    #   transformers
+onnxruntime==1.20.0
+    # via gradio-webrtc
+orjson==3.10.11
+    # via gradio
+packaging==24.2
+    # via
+    #   accelerate
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   lazy-loader
+    #   onnxruntime
+    #   peft
+    #   pooch
+    #   transformers
+pandas==2.2.3
+    # via gradio
+peft==0.13.2
+    # via -r requirements.in
+pillow==11.0.0
+    # via gradio
+platformdirs==4.3.6
+    # via pooch
+pooch==1.8.2
+    # via librosa
+propcache==0.2.0
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.28.3
+    # via onnxruntime
+psutil==6.1.0
+    # via
+    #   accelerate
+    #   peft
+pycparser==2.22
+    # via cffi
+pydantic==2.9.2
+    # via
+    #   fastapi
+    #   gradio
+pydantic-core==2.23.4
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pyee==12.0.0
+    # via aiortc
+pygments==2.18.0
+    # via rich
+pyjwt==2.9.0
+    # via twilio
+pylibsrtp==0.10.0
+    # via aiortc
+pyopenssl==24.2.1
+    # via aiortc
+python-dateutil==2.9.0.post0
+    # via pandas
+python-multipart==0.0.12
+    # via gradio
+pytz==2024.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   gradio
+    #   huggingface-hub
+    #   peft
+    #   transformers
+regex==2024.11.6
+    # via transformers
+requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   pooch
+    #   transformers
+    #   twilio
+rich==13.9.4
+    # via typer
+ruff==0.7.4
+    # via gradio
+safehttpx==0.1.1
+    # via gradio
+safetensors==0.4.5
+    # via
+    #   accelerate
+    #   peft
+    #   transformers
+scikit-learn==1.5.2
+    # via librosa
+scipy==1.14.1
+    # via
+    #   librosa
+    #   scikit-learn
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.16.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   httpx
+soundfile==0.12.1
+    # via librosa
+soxr==0.5.0.post1
+    # via librosa
+starlette==0.41.2
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.13.1
+    # via
+    #   onnxruntime
+    #   torch
+threadpoolctl==3.5.0
+    # via scikit-learn
+tokenizers==0.20.3
+    # via transformers
+tomlkit==0.12.0
+    # via gradio
+torch==2.5.1
+    # via
+    #   accelerate
+    #   peft
+tqdm==4.67.0
+    # via
+    #   huggingface-hub
+    #   peft
+    #   transformers
+transformers==4.46.2
+    # via
+    #   -r requirements.in
+    #   peft
+twilio==9.3.7
+    # via -r requirements.in
+typer==0.13.0
+    # via gradio
+typing-extensions==4.12.2
+    # via
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   librosa
+    #   pydantic
+    #   pydantic-core
+    #   pyee
+    #   torch
+    #   typer
+tzdata==2024.2
+    # via pandas
+urllib3==2.2.3
+    # via requests
+uvicorn==0.32.0
+    # via gradio
+websockets==12.0
+    # via gradio-client
+yarl==1.17.1
+    # via aiohttp