freddyaboulton HF staff commited on
Commit
b2051b3
·
1 Parent(s): 7f9d568
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -4,9 +4,12 @@ import transformers
4
  import numpy as np
5
  from twilio.rest import Client
6
  import os
 
 
7
 
8
 
9
- pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True)
 
10
 
11
 
12
  account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
@@ -27,10 +30,13 @@ else:
27
 
28
 
29
  def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_convo: list[dict]):
 
 
 
 
30
 
31
- output = pipe({"audio": audio[1], "turns": conversation, "sampling_rate": audio[0]},
32
- max_new_tokens=512,
33
- device=0)
34
  print("output", output)
35
 
36
  conversation.append({"role": "user", "content": output["transcription"]})
 
4
  import numpy as np
5
  from twilio.rest import Client
6
  import os
7
+ import torch
8
+ import librosa
9
 
10
 
11
+ pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
12
+ device=torch.device('cuda'))
13
 
14
 
15
  account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
 
30
 
31
 
32
  def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_convo: list[dict]):
33
+ original_sr = audio[0]
34
+ target_sr = 16000
35
+
36
+ audio_sr = librosa.resample(audio[1], orig_sr=original_sr, target_sr=target_sr)
37
 
38
+ output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
39
+ max_new_tokens=512)
 
40
  print("output", output)
41
 
42
  conversation.append({"role": "user", "content": output["transcription"]})