Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import
|
4 |
from jiwer import wer
|
5 |
|
6 |
# Load models
|
@@ -16,24 +16,22 @@ whisper_pipeline_2 = pipeline(
|
|
16 |
)
|
17 |
|
18 |
# Set up openai/whisper-medium for Urdu transcription
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
# Set forced decoder IDs for Urdu
|
23 |
-
language = "<|ur|>" # Urdu language token
|
24 |
-
task = "<|transcribe|>" # Transcription task token
|
25 |
-
forced_decoder_ids = processor_3.get_decoder_prompt_ids(language=language, task=task)
|
26 |
|
27 |
def transcribe_with_whisper_medium(audio_path):
|
28 |
"""
|
29 |
Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
|
30 |
"""
|
31 |
-
inputs =
|
32 |
with torch.no_grad():
|
33 |
-
|
|
|
34 |
inputs["input_features"], forced_decoder_ids=forced_decoder_ids
|
35 |
)
|
36 |
-
|
|
|
37 |
|
38 |
def transcribe_and_compare(audio_path, original_transcription=None):
|
39 |
"""
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
|
4 |
from jiwer import wer
|
5 |
|
6 |
# Load models
|
|
|
16 |
)
|
17 |
|
18 |
# Set up openai/whisper-medium for Urdu transcription
|
19 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
|
20 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
|
21 |
+
forced_decoder_ids = processor.get_decoder_prompt_ids(language="urdu", task="transcribe")
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def transcribe_with_whisper_medium(audio_path):
|
24 |
"""
|
25 |
Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
|
26 |
"""
|
27 |
+
inputs = processor(audio_path, return_tensors="pt", sampling_rate=16000)
|
28 |
with torch.no_grad():
|
29 |
+
# Generate the transcription using the forced decoder IDs for Urdu
|
30 |
+
outputs = model.generate(
|
31 |
inputs["input_features"], forced_decoder_ids=forced_decoder_ids
|
32 |
)
|
33 |
+
# Decode the outputs to text
|
34 |
+
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
35 |
|
36 |
def transcribe_and_compare(audio_path, original_transcription=None):
|
37 |
"""
|