Spaces:
Paused
Paused
Commit
·
449d4d5
1
Parent(s):
2d88e5d
try
Browse files
app.py
CHANGED
@@ -35,15 +35,17 @@ def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_c
|
|
35 |
|
36 |
audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
|
37 |
orig_sr=original_sr, target_sr=target_sr)
|
|
|
|
|
38 |
|
39 |
output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
|
40 |
max_new_tokens=512)
|
41 |
print("output", output)
|
42 |
|
43 |
-
conversation.append({"role": "user", "content":
|
44 |
-
conversation.append({"role": "assistant", "content": output
|
45 |
-
gradio_convo.append({"role": "user", "content":
|
46 |
-
gradio_convo.append({"role": "assistant", "content": output
|
47 |
|
48 |
yield AdditionalOutputs(conversation, gradio_convo)
|
49 |
|
|
|
35 |
|
36 |
audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
|
37 |
orig_sr=original_sr, target_sr=target_sr)
|
38 |
+
inputs = pipe.preprocess({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr})
|
39 |
+
print("preprocess", inputs)
|
40 |
|
41 |
output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
|
42 |
max_new_tokens=512)
|
43 |
print("output", output)
|
44 |
|
45 |
+
conversation.append({"role": "user", "content": "hi"})
|
46 |
+
conversation.append({"role": "assistant", "content": output})
|
47 |
+
gradio_convo.append({"role": "user", "content": "hi"})
|
48 |
+
gradio_convo.append({"role": "assistant", "content": output})
|
49 |
|
50 |
yield AdditionalOutputs(conversation, gradio_convo)
|
51 |
|