Spaces:
Paused
Paused
Commit
·
b2051b3
1
Parent(s):
7f9d568
add code
Browse files
app.py
CHANGED
@@ -4,9 +4,12 @@ import transformers
|
|
4 |
import numpy as np
|
5 |
from twilio.rest import Client
|
6 |
import os
|
|
|
|
|
7 |
|
8 |
|
9 |
-
pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True
|
|
|
10 |
|
11 |
|
12 |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
@@ -27,10 +30,13 @@ else:
|
|
27 |
|
28 |
|
29 |
def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_convo: list[dict]):
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
output = pipe({"audio":
|
32 |
-
max_new_tokens=512
|
33 |
-
device=0)
|
34 |
print("output", output)
|
35 |
|
36 |
conversation.append({"role": "user", "content": output["transcription"]})
|
|
|
4 |
import numpy as np
|
5 |
from twilio.rest import Client
|
6 |
import os
|
7 |
+
import torch
|
8 |
+
import librosa
|
9 |
|
10 |
|
11 |
+
pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
|
12 |
+
device=torch.device('cuda'))
|
13 |
|
14 |
|
15 |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
|
|
30 |
|
31 |
|
32 |
def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_convo: list[dict]):
|
33 |
+
original_sr = audio[0]
|
34 |
+
target_sr = 16000
|
35 |
+
|
36 |
+
audio_sr = librosa.resample(audio[1], orig_sr=original_sr, target_sr=target_sr)
|
37 |
|
38 |
+
output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
|
39 |
+
max_new_tokens=512)
|
|
|
40 |
print("output", output)
|
41 |
|
42 |
conversation.append({"role": "user", "content": output["transcription"]})
|