Spaces:

maliahson
/

Model_Evaluations

Runtime error

maliahson commited on Dec 11, 2024

Commit

bbe1a26

verified ·

1 Parent(s): 6f13335

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import pipeline
 from jiwer import wer
 # Load models
@@ -15,12 +15,25 @@ whisper_pipeline_2 = pipeline(
     device=0 if torch.cuda.is_available() else "cpu"
 )
-whisper_pipeline_3 = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-medium",
-    device=0 if torch.cuda.is_available() else "cpu",
-    model_kwargs={"language": "<|ur|>"}
-)
 def transcribe_and_compare(audio_path, original_transcription=None):
     """
@@ -35,7 +48,7 @@ def transcribe_and_compare(audio_path, original_transcription=None):
     # Transcriptions from all three models
     transcription_1 = whisper_pipeline_1(audio_path)["text"]
     transcription_2 = whisper_pipeline_2(audio_path)["text"]
-    transcription_3 = whisper_pipeline_3(audio_path)["text"]
     # Prepare comparison results
     comparison_result = {

 import gradio as gr
 import torch
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from jiwer import wer
 # Load models
     device=0 if torch.cuda.is_available() else "cpu"
 )
+# Set up openai/whisper-medium for Urdu transcription
+model_3 = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-medium")
+processor_3 = AutoProcessor.from_pretrained("openai/whisper-medium")
+# Set forced decoder IDs for Urdu
+language = "<|ur|>"  # Urdu language token
+task = "<|transcribe|>"  # Transcription task token
+forced_decoder_ids = processor_3.get_decoder_prompt_ids(language=language, task=task)
+def transcribe_with_whisper_medium(audio_path):
+    """
+    Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
+    """
+    inputs = processor_3(audio_path, return_tensors="pt", sampling_rate=16000)
+    with torch.no_grad():
+        outputs = model_3.generate(
+            inputs["input_features"], forced_decoder_ids=forced_decoder_ids
+        )
+    return processor_3.batch_decode(outputs, skip_special_tokens=True)[0]
 def transcribe_and_compare(audio_path, original_transcription=None):
     """
     # Transcriptions from all three models
     transcription_1 = whisper_pipeline_1(audio_path)["text"]
     transcription_2 = whisper_pipeline_2(audio_path)["text"]
+    transcription_3 = transcribe_with_whisper_medium(audio_path)
     # Prepare comparison results
     comparison_result = {