freddyaboulton HF staff commited on
Commit
1ac399b
·
1 Parent(s): 1ed7130
Files changed (3) hide show
  1. app.py +75 -0
  2. requirements.in +6 -0
  3. requirements.txt +328 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
3
+ import transformers
4
+ import numpy as np
5
+ from twilio.rest import Client
6
+ import os
7
+
8
+
9
+ pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True)
10
+
11
+
12
+ account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
13
+ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
14
+
15
+ if account_sid and auth_token:
16
+ client = Client(account_sid, auth_token)
17
+
18
+ token = client.tokens.create()
19
+
20
+ rtc_configuration = {
21
+ "iceServers": token.ice_servers,
22
+ "iceTransportPolicy": "relay",
23
+ }
24
+ else:
25
+ rtc_configuration = None
26
+
27
+
28
+
29
+ def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]):
30
+
31
+ output = pipe({"audio": audio[1], "turns": conversation, "sampling_rate": audio[0]},
32
+ max_new_tokens=512)
33
+
34
+ conversation.append({"role": "user", "content": output["transcription"]})
35
+ conversation.append({"role": "assistant", "content": output["reply"]})
36
+
37
+ yield AdditionalOutputs(conversation)
38
+
39
+
40
+ with gr.Blocks() as demo:
41
+ gr.HTML(
42
+ """
43
+ <h1 style='text-align: center'>
44
+ Talk to Ultravox Llama 3.1 8b (Powered by WebRTC ⚡️)
45
+ </h1>
46
+ <p style='text-align: center'>
47
+ Once you grant access to your microphone, you can talk naturally to Ultravox.
48
+ When you stop talking, the audio will be sent for processing.
49
+ </p>
50
+ <p style='text-align: center'>
51
+ Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
52
+ </p>
53
+ """
54
+ )
55
+ transformers_convo = gr.State(value=[{
56
+ "role": "system",
57
+ "content": "You are a friendly and helpful character. You love to answer questions for people."
58
+ }])
59
+ with gr.Row():
60
+ with gr.Column():
61
+ audio = WebRTC(
62
+ rtc_configuration=rtc_configuration,
63
+ label="Stream",
64
+ mode="send",
65
+ modality="audio",
66
+ )
67
+ with gr.Column():
68
+ transcript = gr.Chatbot(label="transcript", type="messages")
69
+
70
+ audio.stream(ReplyOnPause(transcribe), inputs=[audio, transformers_convo, transcript], outputs=[audio], time_limit=90)
71
+ audio.on_additional_outputs(lambda s,a: (s,a), outputs=[transformers_convo, transcript],
72
+ queue=False, show_progress="hidden")
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()
requirements.in ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio_webrtc[vad]==0.0.12
2
+ numba==0.60.0
3
+ twilio
4
+ transformers
5
+ accelerate
6
+ peft
requirements.txt ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ accelerate==1.1.1
4
+ # via
5
+ # -r requirements.in
6
+ # peft
7
+ aiofiles==23.2.1
8
+ # via gradio
9
+ aiohappyeyeballs==2.4.3
10
+ # via aiohttp
11
+ aiohttp==3.11.2
12
+ # via
13
+ # aiohttp-retry
14
+ # twilio
15
+ aiohttp-retry==2.8.3
16
+ # via twilio
17
+ aioice==0.9.0
18
+ # via aiortc
19
+ aiortc==1.9.0
20
+ # via gradio-webrtc
21
+ aiosignal==1.3.1
22
+ # via aiohttp
23
+ annotated-types==0.7.0
24
+ # via pydantic
25
+ anyio==4.6.2.post1
26
+ # via
27
+ # gradio
28
+ # httpx
29
+ # starlette
30
+ attrs==24.2.0
31
+ # via aiohttp
32
+ audioread==3.0.1
33
+ # via librosa
34
+ av==12.3.0
35
+ # via aiortc
36
+ certifi==2024.8.30
37
+ # via
38
+ # httpcore
39
+ # httpx
40
+ # requests
41
+ cffi==1.17.1
42
+ # via
43
+ # aiortc
44
+ # cryptography
45
+ # pylibsrtp
46
+ # soundfile
47
+ charset-normalizer==3.4.0
48
+ # via requests
49
+ click==8.1.7
50
+ # via
51
+ # typer
52
+ # uvicorn
53
+ coloredlogs==15.0.1
54
+ # via onnxruntime
55
+ cryptography==43.0.3
56
+ # via
57
+ # aiortc
58
+ # pyopenssl
59
+ decorator==5.1.1
60
+ # via librosa
61
+ dnspython==2.7.0
62
+ # via aioice
63
+ fastapi==0.115.5
64
+ # via gradio
65
+ ffmpy==0.4.0
66
+ # via gradio
67
+ filelock==3.16.1
68
+ # via
69
+ # huggingface-hub
70
+ # torch
71
+ # transformers
72
+ flatbuffers==24.3.25
73
+ # via onnxruntime
74
+ frozenlist==1.5.0
75
+ # via
76
+ # aiohttp
77
+ # aiosignal
78
+ fsspec==2024.10.0
79
+ # via
80
+ # gradio-client
81
+ # huggingface-hub
82
+ # torch
83
+ google-crc32c==1.6.0
84
+ # via aiortc
85
+ gradio==5.5.0
86
+ # via gradio-webrtc
87
+ gradio-client==1.4.2
88
+ # via gradio
89
+ gradio-webrtc==0.0.12
90
+ # via -r requirements.in
91
+ h11==0.14.0
92
+ # via
93
+ # httpcore
94
+ # uvicorn
95
+ httpcore==1.0.7
96
+ # via httpx
97
+ httpx==0.27.2
98
+ # via
99
+ # gradio
100
+ # gradio-client
101
+ # safehttpx
102
+ huggingface-hub==0.26.2
103
+ # via
104
+ # accelerate
105
+ # gradio
106
+ # gradio-client
107
+ # peft
108
+ # tokenizers
109
+ # transformers
110
+ humanfriendly==10.0
111
+ # via coloredlogs
112
+ idna==3.10
113
+ # via
114
+ # anyio
115
+ # httpx
116
+ # requests
117
+ # yarl
118
+ ifaddr==0.2.0
119
+ # via aioice
120
+ jinja2==3.1.4
121
+ # via
122
+ # gradio
123
+ # torch
124
+ joblib==1.4.2
125
+ # via
126
+ # librosa
127
+ # scikit-learn
128
+ lazy-loader==0.4
129
+ # via librosa
130
+ librosa==0.10.2.post1
131
+ # via gradio-webrtc
132
+ llvmlite==0.43.0
133
+ # via numba
134
+ markdown-it-py==3.0.0
135
+ # via rich
136
+ markupsafe==2.1.5
137
+ # via
138
+ # gradio
139
+ # jinja2
140
+ mdurl==0.1.2
141
+ # via markdown-it-py
142
+ mpmath==1.3.0
143
+ # via sympy
144
+ msgpack==1.1.0
145
+ # via librosa
146
+ multidict==6.1.0
147
+ # via
148
+ # aiohttp
149
+ # yarl
150
+ networkx==3.4.2
151
+ # via torch
152
+ numba==0.60.0
153
+ # via
154
+ # -r requirements.in
155
+ # librosa
156
+ numpy==2.0.2
157
+ # via
158
+ # accelerate
159
+ # gradio
160
+ # librosa
161
+ # numba
162
+ # onnxruntime
163
+ # pandas
164
+ # peft
165
+ # scikit-learn
166
+ # scipy
167
+ # soxr
168
+ # transformers
169
+ onnxruntime==1.20.0
170
+ # via gradio-webrtc
171
+ orjson==3.10.11
172
+ # via gradio
173
+ packaging==24.2
174
+ # via
175
+ # accelerate
176
+ # gradio
177
+ # gradio-client
178
+ # huggingface-hub
179
+ # lazy-loader
180
+ # onnxruntime
181
+ # peft
182
+ # pooch
183
+ # transformers
184
+ pandas==2.2.3
185
+ # via gradio
186
+ peft==0.13.2
187
+ # via -r requirements.in
188
+ pillow==11.0.0
189
+ # via gradio
190
+ platformdirs==4.3.6
191
+ # via pooch
192
+ pooch==1.8.2
193
+ # via librosa
194
+ propcache==0.2.0
195
+ # via
196
+ # aiohttp
197
+ # yarl
198
+ protobuf==5.28.3
199
+ # via onnxruntime
200
+ psutil==6.1.0
201
+ # via
202
+ # accelerate
203
+ # peft
204
+ pycparser==2.22
205
+ # via cffi
206
+ pydantic==2.9.2
207
+ # via
208
+ # fastapi
209
+ # gradio
210
+ pydantic-core==2.23.4
211
+ # via pydantic
212
+ pydub==0.25.1
213
+ # via gradio
214
+ pyee==12.0.0
215
+ # via aiortc
216
+ pygments==2.18.0
217
+ # via rich
218
+ pyjwt==2.9.0
219
+ # via twilio
220
+ pylibsrtp==0.10.0
221
+ # via aiortc
222
+ pyopenssl==24.2.1
223
+ # via aiortc
224
+ python-dateutil==2.9.0.post0
225
+ # via pandas
226
+ python-multipart==0.0.12
227
+ # via gradio
228
+ pytz==2024.2
229
+ # via pandas
230
+ pyyaml==6.0.2
231
+ # via
232
+ # accelerate
233
+ # gradio
234
+ # huggingface-hub
235
+ # peft
236
+ # transformers
237
+ regex==2024.11.6
238
+ # via transformers
239
+ requests==2.32.3
240
+ # via
241
+ # huggingface-hub
242
+ # pooch
243
+ # transformers
244
+ # twilio
245
+ rich==13.9.4
246
+ # via typer
247
+ ruff==0.7.4
248
+ # via gradio
249
+ safehttpx==0.1.1
250
+ # via gradio
251
+ safetensors==0.4.5
252
+ # via
253
+ # accelerate
254
+ # peft
255
+ # transformers
256
+ scikit-learn==1.5.2
257
+ # via librosa
258
+ scipy==1.14.1
259
+ # via
260
+ # librosa
261
+ # scikit-learn
262
+ semantic-version==2.10.0
263
+ # via gradio
264
+ shellingham==1.5.4
265
+ # via typer
266
+ six==1.16.0
267
+ # via python-dateutil
268
+ sniffio==1.3.1
269
+ # via
270
+ # anyio
271
+ # httpx
272
+ soundfile==0.12.1
273
+ # via librosa
274
+ soxr==0.5.0.post1
275
+ # via librosa
276
+ starlette==0.41.2
277
+ # via
278
+ # fastapi
279
+ # gradio
280
+ sympy==1.13.1
281
+ # via
282
+ # onnxruntime
283
+ # torch
284
+ threadpoolctl==3.5.0
285
+ # via scikit-learn
286
+ tokenizers==0.20.3
287
+ # via transformers
288
+ tomlkit==0.12.0
289
+ # via gradio
290
+ torch==2.5.1
291
+ # via
292
+ # accelerate
293
+ # peft
294
+ tqdm==4.67.0
295
+ # via
296
+ # huggingface-hub
297
+ # peft
298
+ # transformers
299
+ transformers==4.46.2
300
+ # via
301
+ # -r requirements.in
302
+ # peft
303
+ twilio==9.3.7
304
+ # via -r requirements.in
305
+ typer==0.13.0
306
+ # via gradio
307
+ typing-extensions==4.12.2
308
+ # via
309
+ # fastapi
310
+ # gradio
311
+ # gradio-client
312
+ # huggingface-hub
313
+ # librosa
314
+ # pydantic
315
+ # pydantic-core
316
+ # pyee
317
+ # torch
318
+ # typer
319
+ tzdata==2024.2
320
+ # via pandas
321
+ urllib3==2.2.3
322
+ # via requests
323
+ uvicorn==0.32.0
324
+ # via gradio
325
+ websockets==12.0
326
+ # via gradio-client
327
+ yarl==1.17.1
328
+ # via aiohttp