maliahson commited on
Commit
1cdc427
·
verified ·
1 Parent(s): bbe1a26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -11
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
4
  from jiwer import wer
5
 
6
  # Load models
@@ -16,24 +16,22 @@ whisper_pipeline_2 = pipeline(
16
  )
17
 
18
  # Set up openai/whisper-medium for Urdu transcription
19
- model_3 = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-medium")
20
- processor_3 = AutoProcessor.from_pretrained("openai/whisper-medium")
21
-
22
- # Set forced decoder IDs for Urdu
23
- language = "<|ur|>" # Urdu language token
24
- task = "<|transcribe|>" # Transcription task token
25
- forced_decoder_ids = processor_3.get_decoder_prompt_ids(language=language, task=task)
26
 
27
  def transcribe_with_whisper_medium(audio_path):
28
  """
29
  Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
30
  """
31
- inputs = processor_3(audio_path, return_tensors="pt", sampling_rate=16000)
32
  with torch.no_grad():
33
- outputs = model_3.generate(
 
34
  inputs["input_features"], forced_decoder_ids=forced_decoder_ids
35
  )
36
- return processor_3.batch_decode(outputs, skip_special_tokens=True)[0]
 
37
 
38
  def transcribe_and_compare(audio_path, original_transcription=None):
39
  """
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
4
  from jiwer import wer
5
 
6
  # Load models
 
16
  )
17
 
18
  # Set up openai/whisper-medium for Urdu transcription
19
+ processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
20
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
21
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="urdu", task="transcribe")
 
 
 
 
22
 
23
  def transcribe_with_whisper_medium(audio_path):
24
  """
25
  Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
26
  """
27
+ inputs = processor(audio_path, return_tensors="pt", sampling_rate=16000)
28
  with torch.no_grad():
29
+ # Generate the transcription using the forced decoder IDs for Urdu
30
+ outputs = model.generate(
31
  inputs["input_features"], forced_decoder_ids=forced_decoder_ids
32
  )
33
+ # Decode the outputs to text
34
+ return processor.batch_decode(outputs, skip_special_tokens=True)[0]
35
 
36
  def transcribe_and_compare(audio_path, original_transcription=None):
37
  """