Spaces:
Runtime error
Runtime error
File size: 3,806 Bytes
85a1190 a730d1e 1cdc427 77fb778 85a1190 a730d1e ec6b5eb bbe1a26 1cdc427 bbe1a26 1cdc427 bbe1a26 1cdc427 bbe1a26 1cdc427 85a1190 a730d1e 77fb778 09ad572 77fb778 ec6b5eb a730d1e bbe1a26 09ad572 ec6b5eb 77fb778 09ad572 77fb778 ec6b5eb 77fb778 85a1190 ec6b5eb 85a1190 09ad572 ec6b5eb a730d1e 09ad572 77fb778 ec6b5eb 77fb778 09ad572 77fb778 a730d1e 85a1190 a730d1e 77fb778 a730d1e 85a1190 a730d1e 85a1190 09ad572 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
from jiwer import wer
# Load models
whisper_pipeline_1 = pipeline(
"automatic-speech-recognition",
model="maliahson/Finetuned_Whisper_Medium_Model_2"
)
whisper_pipeline_2 = pipeline(
"automatic-speech-recognition",
model="openai/whisper-large-v3-turbo",
device=0 if torch.cuda.is_available() else "cpu"
)
# Set up openai/whisper-medium for Urdu transcription
processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
forced_decoder_ids = processor.get_decoder_prompt_ids(language="urdu", task="transcribe")
def transcribe_with_whisper_medium(audio_path):
"""
Transcribe audio using the openai/whisper-medium model with forced language settings for Urdu.
"""
inputs = processor(audio_path, return_tensors="pt", sampling_rate=16000)
with torch.no_grad():
# Generate the transcription using the forced decoder IDs for Urdu
outputs = model.generate(
inputs["input_features"], forced_decoder_ids=forced_decoder_ids
)
# Decode the outputs to text
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
def transcribe_and_compare(audio_path, original_transcription=None):
"""
Transcribes an audio file using three Whisper models and compares results.
Args:
audio_path (str): Path to the audio file.
original_transcription (str, optional): Ground truth transcription.
Returns:
dict: Results including transcriptions and WER calculations.
"""
# Transcriptions from all three models
transcription_1 = whisper_pipeline_1(audio_path)["text"]
transcription_2 = whisper_pipeline_2(audio_path)["text"]
transcription_3 = transcribe_with_whisper_medium(audio_path)
# Prepare comparison results
comparison_result = {
"Model 1 Output (maliahson/Finetuned_Whisper_Medium_Model_2)": transcription_1,
"Model 2 Output (openai/whisper-large-v3-turbo)": transcription_2,
"Model 3 Output (Openai/whisper-medium, Urdu)": transcription_3
}
if original_transcription:
# Calculate Word Error Rate (WER) for all models
wer_1 = wer(original_transcription, transcription_1)
wer_2 = wer(original_transcription, transcription_2)
wer_3 = wer(original_transcription, transcription_3)
# Add WER scores to results
comparison_result["WER Model 1"] = wer_1
comparison_result["WER Model 2"] = wer_2
comparison_result["WER Model 3"] = wer_3
else:
# Compare outputs of all three models when no ground truth is provided
comparison_result["Difference Between Models"] = {
"Model 1 Unique Words": set(transcription_1.split()) - set(transcription_2.split()) - set(transcription_3.split()),
"Model 2 Unique Words": set(transcription_2.split()) - set(transcription_1.split()) - set(transcription_3.split()),
"Model 3 Unique Words": set(transcription_3.split()) - set(transcription_1.split()) - set(transcription_2.split()),
}
return comparison_result
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## Audio Transcription and Comparison")
audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
original_transcription = gr.Textbox(lines=2, label="Original Transcription (Optional)")
output = gr.JSON(label="Comparison Results")
submit_btn = gr.Button("Transcribe and Compare")
submit_btn.click(
transcribe_and_compare,
inputs=[audio_input, original_transcription],
outputs=output
)
demo.launch(debug=True)
|