Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Optional
|
|
| 5 |
import spaces
|
| 6 |
import torch
|
| 7 |
import gradio as gr
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 10 |
|
|
@@ -60,7 +61,10 @@ def transcribe(inputs: str):
|
|
| 60 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 61 |
with open(inputs, "rb") as f:
|
| 62 |
inputs = f.read()
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
| 64 |
output = ""
|
| 65 |
for n, s in enumerate(prediction["speakers"]):
|
| 66 |
text_timestamped = "\n".join([f"- **{format_time(*c['timestamp'])}** {c['text']}" for c in prediction[f"chunks/{s}"]])
|
|
|
|
| 5 |
import spaces
|
| 6 |
import torch
|
| 7 |
import gradio as gr
|
| 8 |
+
import numpy as np
|
| 9 |
from transformers import pipeline
|
| 10 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 11 |
|
|
|
|
| 61 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 62 |
with open(inputs, "rb") as f:
|
| 63 |
inputs = f.read()
|
| 64 |
+
inputs = ffmpeg_read(inputs, sampling_rate)
|
| 65 |
+
array_pad = np.zeros(int(pipe.feature_extractor.sampling_rate * 0.5))
|
| 66 |
+
inputs = np.concatenate([array_pad, inputs, array_pad])
|
| 67 |
+
prediction = get_prediction({"array": inputs, "sampling_rate": sampling_rate})
|
| 68 |
output = ""
|
| 69 |
for n, s in enumerate(prediction["speakers"]):
|
| 70 |
text_timestamped = "\n".join([f"- **{format_time(*c['timestamp'])}** {c['text']}" for c in prediction[f"chunks/{s}"]])
|