maliahson commited on
Commit
85a1190
·
verified ·
1 Parent(s): c68f1dc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from jiwer import wer
4
+ import difflib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+
8
+ # Load Model 1
9
+ model_name_1 = "maliahson/whisper-agri"
10
+ whisper_pipeline_1 = pipeline("automatic-speech-recognition", model=model_name_1)
11
+
12
+ # Load Model 2
13
+ model_name_2 = "openai/whisper-large-v3-turbo"
14
+ whisper_pipeline_2 = pipeline("automatic-speech-recognition", model=model_name_2)
15
+
16
+ def compare_transcriptions(audio, original_transcription=None):
17
+ """
18
+ Compares transcriptions from two models and optionally calculates WER.
19
+ Args:
20
+ audio: Uploaded audio file.
21
+ original_transcription: The ground-truth transcription (optional).
22
+ Returns:
23
+ Transcriptions from both models, and comparison results.
24
+ """
25
+ # Transcribe audio using Model 1
26
+ transcription_1 = whisper_pipeline_1(audio)["text"]
27
+
28
+ # Transcribe audio using Model 2
29
+ transcription_2 = whisper_pipeline_2(audio)["text"]
30
+
31
+ result = {
32
+ "Model 1 Transcription": transcription_1,
33
+ "Model 2 Transcription": transcription_2
34
+ }
35
+
36
+ if original_transcription:
37
+ # Calculate WER for both models
38
+ wer_1 = wer(original_transcription, transcription_1)
39
+ wer_2 = wer(original_transcription, transcription_2)
40
+ result["WER (Model 1)"] = f"{wer_1:.2%}"
41
+ result["WER (Model 2)"] = f"{wer_2:.2%}"
42
+
43
+ # Create a WER comparison plot
44
+ fig, ax = plt.subplots()
45
+ bars = ax.bar(["Model 1", "Model 2"], [wer_1, wer_2], color=["blue", "orange"])
46
+ ax.set_title("Word Error Rate Comparison")
47
+ ax.set_ylabel("WER")
48
+ ax.set_ylim(0, 1)
49
+ ax.bar_label(bars, fmt="%.2f")
50
+ plt.tight_layout()
51
+ return result, fig
52
+ else:
53
+ # Calculate and visualize differences between transcriptions
54
+ diff = difflib.unified_diff(
55
+ transcription_1.split(),
56
+ transcription_2.split(),
57
+ lineterm="",
58
+ n=0
59
+ )
60
+ diff_result = "\n".join(diff)
61
+ result["Differences"] = diff_result
62
+ return result, None
63
+
64
+ # Gradio Interface
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("# Audio Transcription Comparison with Whisper Models")
67
+ gr.Markdown("Upload an audio file, and optionally provide the original transcription to compare the outputs.")
68
+
69
+ with gr.Row():
70
+ audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Audio")
71
+ original_transcription = gr.Textbox(
72
+ placeholder="Enter original transcription here (optional)",
73
+ label="Original Transcription"
74
+ )
75
+
76
+ compare_button = gr.Button("Compare Transcriptions")
77
+ result_output = gr.JSON(label="Transcriptions and Comparison")
78
+ wer_plot = gr.Plot(label="Word Error Rate Comparison (if applicable)")
79
+
80
+ compare_button.click(
81
+ compare_transcriptions,
82
+ inputs=[audio_input, original_transcription],
83
+ outputs=[result_output, wer_plot]
84
+ )
85
+
86
+ # Launch app
87
+ demo.launch(enable_queue=True)