maliahson commited on
Commit
a730d1e
·
verified ·
1 Parent(s): 85a1190

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -73
app.py CHANGED
@@ -1,87 +1,38 @@
1
  import gradio as gr
 
2
  from transformers import pipeline
3
- from jiwer import wer
4
- import difflib
5
- import matplotlib.pyplot as plt
6
- import numpy as np
7
 
8
- # Load Model 1
9
- model_name_1 = "maliahson/whisper-agri"
10
- whisper_pipeline_1 = pipeline("automatic-speech-recognition", model=model_name_1)
 
11
 
12
- # Load Model 2
13
- model_name_2 = "openai/whisper-large-v3-turbo"
14
- whisper_pipeline_2 = pipeline("automatic-speech-recognition", model=model_name_2)
 
15
 
16
- def compare_transcriptions(audio, original_transcription=None):
17
- """
18
- Compares transcriptions from two models and optionally calculates WER.
19
- Args:
20
- audio: Uploaded audio file.
21
- original_transcription: The ground-truth transcription (optional).
22
- Returns:
23
- Transcriptions from both models, and comparison results.
24
- """
25
- # Transcribe audio using Model 1
26
- transcription_1 = whisper_pipeline_1(audio)["text"]
27
-
28
- # Transcribe audio using Model 2
29
- transcription_2 = whisper_pipeline_2(audio)["text"]
30
-
31
- result = {
32
- "Model 1 Transcription": transcription_1,
33
- "Model 2 Transcription": transcription_2
34
- }
35
-
36
  if original_transcription:
37
- # Calculate WER for both models
38
  wer_1 = wer(original_transcription, transcription_1)
39
  wer_2 = wer(original_transcription, transcription_2)
40
- result["WER (Model 1)"] = f"{wer_1:.2%}"
41
- result["WER (Model 2)"] = f"{wer_2:.2%}"
42
-
43
- # Create a WER comparison plot
44
- fig, ax = plt.subplots()
45
- bars = ax.bar(["Model 1", "Model 2"], [wer_1, wer_2], color=["blue", "orange"])
46
- ax.set_title("Word Error Rate Comparison")
47
- ax.set_ylabel("WER")
48
- ax.set_ylim(0, 1)
49
- ax.bar_label(bars, fmt="%.2f")
50
- plt.tight_layout()
51
- return result, fig
52
- else:
53
- # Calculate and visualize differences between transcriptions
54
- diff = difflib.unified_diff(
55
- transcription_1.split(),
56
- transcription_2.split(),
57
- lineterm="",
58
- n=0
59
- )
60
- diff_result = "\n".join(diff)
61
- result["Differences"] = diff_result
62
- return result, None
63
 
64
  # Gradio Interface
65
  with gr.Blocks() as demo:
66
- gr.Markdown("# Audio Transcription Comparison with Whisper Models")
67
- gr.Markdown("Upload an audio file, and optionally provide the original transcription to compare the outputs.")
68
-
69
- with gr.Row():
70
- audio_input = gr.Audio(source="upload", type="filepath", label="Upload or Record Audio")
71
- original_transcription = gr.Textbox(
72
- placeholder="Enter original transcription here (optional)",
73
- label="Original Transcription"
74
- )
75
-
76
- compare_button = gr.Button("Compare Transcriptions")
77
- result_output = gr.JSON(label="Transcriptions and Comparison")
78
- wer_plot = gr.Plot(label="Word Error Rate Comparison (if applicable)")
79
-
80
- compare_button.click(
81
- compare_transcriptions,
82
  inputs=[audio_input, original_transcription],
83
- outputs=[result_output, wer_plot]
84
  )
85
 
86
- # Launch app
87
- demo.launch(enable_queue=True)
 
1
  import gradio as gr
2
+ import torch
3
  from transformers import pipeline
 
 
 
 
4
 
5
+ # Load models
6
+ whisper_pipeline_1 = pipeline("automatic-speech-recognition", model="maliahson/whisper-agri")
7
+ device = 0 if torch.cuda.is_available() else "cpu"
8
+ whisper_pipeline_2 = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", device=device)
9
 
10
+ def transcribe_and_compare(audio_path, original_transcription=None):
11
+ transcription_1 = whisper_pipeline_1(audio_path)["text"]
12
+ transcription_2 = whisper_pipeline_2(audio_path)["text"]
13
+ comparison_result = {"Model 1 Output": transcription_1, "Model 2 Output": transcription_2}
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  if original_transcription:
16
+ from jiwer import wer
17
  wer_1 = wer(original_transcription, transcription_1)
18
  wer_2 = wer(original_transcription, transcription_2)
19
+ comparison_result["WER Model 1"] = wer_1
20
+ comparison_result["WER Model 2"] = wer_2
21
+
22
+ return comparison_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Gradio Interface
25
  with gr.Blocks() as demo:
26
+ gr.Markdown("## Audio Transcription and Comparison")
27
+ audio_input = gr.Audio(type="filepath", label="Upload or Record Audio (supports upload and recording)")
28
+ original_transcription = gr.Textbox(lines=2, label="Original Transcription (Optional)")
29
+ output = gr.JSON(label="Comparison Results")
30
+ submit_btn = gr.Button("Transcribe and Compare")
31
+
32
+ submit_btn.click(
33
+ transcribe_and_compare,
 
 
 
 
 
 
 
 
34
  inputs=[audio_input, original_transcription],
35
+ outputs=output
36
  )
37
 
38
+ demo.launch(debug=True)