Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from jiwer import wer
|
4 |
+
import difflib
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# Load Model 1
|
9 |
+
model_name_1 = "maliahson/whisper-agri"
|
10 |
+
whisper_pipeline_1 = pipeline("automatic-speech-recognition", model=model_name_1)
|
11 |
+
|
12 |
+
# Load Model 2
|
13 |
+
model_name_2 = "openai/whisper-large-v3-turbo"
|
14 |
+
whisper_pipeline_2 = pipeline("automatic-speech-recognition", model=model_name_2)
|
15 |
+
|
16 |
+
def compare_transcriptions(audio, original_transcription=None):
|
17 |
+
"""
|
18 |
+
Compares transcriptions from two models and optionally calculates WER.
|
19 |
+
Args:
|
20 |
+
audio: Uploaded audio file.
|
21 |
+
original_transcription: The ground-truth transcription (optional).
|
22 |
+
Returns:
|
23 |
+
Transcriptions from both models, and comparison results.
|
24 |
+
"""
|
25 |
+
# Transcribe audio using Model 1
|
26 |
+
transcription_1 = whisper_pipeline_1(audio)["text"]
|
27 |
+
|
28 |
+
# Transcribe audio using Model 2
|
29 |
+
transcription_2 = whisper_pipeline_2(audio)["text"]
|
30 |
+
|
31 |
+
result = {
|
32 |
+
"Model 1 Transcription": transcription_1,
|
33 |
+
"Model 2 Transcription": transcription_2
|
34 |
+
}
|
35 |
+
|
36 |
+
if original_transcription:
|
37 |
+
# Calculate WER for both models
|
38 |
+
wer_1 = wer(original_transcription, transcription_1)
|
39 |
+
wer_2 = wer(original_transcription, transcription_2)
|
40 |
+
result["WER (Model 1)"] = f"{wer_1:.2%}"
|
41 |
+
result["WER (Model 2)"] = f"{wer_2:.2%}"
|
42 |
+
|
43 |
+
# Create a WER comparison plot
|
44 |
+
fig, ax = plt.subplots()
|
45 |
+
bars = ax.bar(["Model 1", "Model 2"], [wer_1, wer_2], color=["blue", "orange"])
|
46 |
+
ax.set_title("Word Error Rate Comparison")
|
47 |
+
ax.set_ylabel("WER")
|
48 |
+
ax.set_ylim(0, 1)
|
49 |
+
ax.bar_label(bars, fmt="%.2f")
|
50 |
+
plt.tight_layout()
|
51 |
+
return result, fig
|
52 |
+
else:
|
53 |
+
# Calculate and visualize differences between transcriptions
|
54 |
+
diff = difflib.unified_diff(
|
55 |
+
transcription_1.split(),
|
56 |
+
transcription_2.split(),
|
57 |
+
lineterm="",
|
58 |
+
n=0
|
59 |
+
)
|
60 |
+
diff_result = "\n".join(diff)
|
61 |
+
result["Differences"] = diff_result
|
62 |
+
return result, None
|
63 |
+
|
64 |
+
# Gradio Interface
|
65 |
+
with gr.Blocks() as demo:
|
66 |
+
gr.Markdown("# Audio Transcription Comparison with Whisper Models")
|
67 |
+
gr.Markdown("Upload an audio file, and optionally provide the original transcription to compare the outputs.")
|
68 |
+
|
69 |
+
with gr.Row():
|
70 |
+
audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Audio")
|
71 |
+
original_transcription = gr.Textbox(
|
72 |
+
placeholder="Enter original transcription here (optional)",
|
73 |
+
label="Original Transcription"
|
74 |
+
)
|
75 |
+
|
76 |
+
compare_button = gr.Button("Compare Transcriptions")
|
77 |
+
result_output = gr.JSON(label="Transcriptions and Comparison")
|
78 |
+
wer_plot = gr.Plot(label="Word Error Rate Comparison (if applicable)")
|
79 |
+
|
80 |
+
compare_button.click(
|
81 |
+
compare_transcriptions,
|
82 |
+
inputs=[audio_input, original_transcription],
|
83 |
+
outputs=[result_output, wer_plot]
|
84 |
+
)
|
85 |
+
|
86 |
+
# Launch app
|
87 |
+
demo.launch(enable_queue=True)
|