Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import pipeline
|
4 |
from jiwer import wer
|
5 |
|
6 |
# Load models
|
@@ -15,12 +15,25 @@ whisper_pipeline_2 = pipeline(
|
|
15 |
device=0 if torch.cuda.is_available() else "cpu"
|
16 |
)
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def transcribe_and_compare(audio_path, original_transcription=None):
|
26 |
"""
|
@@ -35,7 +48,7 @@ def transcribe_and_compare(audio_path, original_transcription=None):
|
|
35 |
# Transcriptions from all three models
|
36 |
transcription_1 = whisper_pipeline_1(audio_path)["text"]
|
37 |
transcription_2 = whisper_pipeline_2(audio_path)["text"]
|
38 |
-
transcription_3 =
|
39 |
|
40 |
# Prepare comparison results
|
41 |
comparison_result = {
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
4 |
from jiwer import wer
|
5 |
|
6 |
# Load models
|
|
|
15 |
device=0 if torch.cuda.is_available() else "cpu"
|
16 |
)
|
17 |
|
18 |
+
# Set up openai/whisper-medium for Urdu transcription
|
19 |
+
model_3 = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-medium")
|
20 |
+
processor_3 = AutoProcessor.from_pretrained("openai/whisper-medium")
|
21 |
+
|
22 |
+
# Set forced decoder IDs for Urdu
|
23 |
+
language = "<|ur|>" # Urdu language token
|
24 |
+
task = "<|transcribe|>" # Transcription task token
|
25 |
+
forced_decoder_ids = processor_3.get_decoder_prompt_ids(language=language, task=task)
|
26 |
+
|
27 |
+
def transcribe_with_whisper_medium(audio_path):
|
28 |
+
"""
|
29 |
+
Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
|
30 |
+
"""
|
31 |
+
inputs = processor_3(audio_path, return_tensors="pt", sampling_rate=16000)
|
32 |
+
with torch.no_grad():
|
33 |
+
outputs = model_3.generate(
|
34 |
+
inputs["input_features"], forced_decoder_ids=forced_decoder_ids
|
35 |
+
)
|
36 |
+
return processor_3.batch_decode(outputs, skip_special_tokens=True)[0]
|
37 |
|
38 |
def transcribe_and_compare(audio_path, original_transcription=None):
|
39 |
"""
|
|
|
48 |
# Transcriptions from all three models
|
49 |
transcription_1 = whisper_pipeline_1(audio_path)["text"]
|
50 |
transcription_2 = whisper_pipeline_2(audio_path)["text"]
|
51 |
+
transcription_3 = transcribe_with_whisper_medium(audio_path)
|
52 |
|
53 |
# Prepare comparison results
|
54 |
comparison_result = {
|