Spaces:
Running
Running
jhj0517
commited on
Commit
·
c7bfcf2
1
Parent(s):
5fee9a3
Calculate WER between gen result & answer
Browse files- tests/test_transcription.py +13 -9
tests/test_transcription.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from modules.whisper.whisper_factory import WhisperFactory
|
| 2 |
from modules.whisper.data_classes import *
|
|
|
|
| 3 |
from modules.utils.paths import WEBUI_DIR
|
| 4 |
from test_config import *
|
| 5 |
|
|
@@ -28,6 +29,10 @@ def test_transcribe(
|
|
| 28 |
if not os.path.exists(audio_path):
|
| 29 |
download_file(TEST_FILE_DOWNLOAD_URL, audio_path_dir)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
whisper_inferencer = WhisperFactory.create_whisper_inference(
|
| 32 |
whisper_type=whisper_type,
|
| 33 |
)
|
|
@@ -54,7 +59,7 @@ def test_transcribe(
|
|
| 54 |
),
|
| 55 |
).to_list()
|
| 56 |
|
| 57 |
-
subtitle_str,
|
| 58 |
[audio_path],
|
| 59 |
None,
|
| 60 |
"SRT",
|
|
@@ -62,12 +67,11 @@ def test_transcribe(
|
|
| 62 |
gr.Progress(),
|
| 63 |
*hparams,
|
| 64 |
)
|
| 65 |
-
|
| 66 |
-
assert
|
| 67 |
-
assert isinstance(file_path[0], str) and file_path
|
| 68 |
|
| 69 |
if not is_pytube_detected_bot():
|
| 70 |
-
whisper_inferencer.transcribe_youtube(
|
| 71 |
TEST_YOUTUBE_URL,
|
| 72 |
"SRT",
|
| 73 |
False,
|
|
@@ -75,17 +79,17 @@ def test_transcribe(
|
|
| 75 |
*hparams,
|
| 76 |
)
|
| 77 |
assert isinstance(subtitle_str, str) and subtitle_str
|
| 78 |
-
assert
|
| 79 |
|
| 80 |
-
whisper_inferencer.transcribe_mic(
|
| 81 |
audio_path,
|
| 82 |
"SRT",
|
| 83 |
False,
|
| 84 |
gr.Progress(),
|
| 85 |
*hparams,
|
| 86 |
)
|
| 87 |
-
|
| 88 |
-
assert
|
| 89 |
|
| 90 |
|
| 91 |
def download_file(url, save_dir):
|
|
|
|
| 1 |
from modules.whisper.whisper_factory import WhisperFactory
|
| 2 |
from modules.whisper.data_classes import *
|
| 3 |
+
from modules.utils.subtitle_manager import read_file
|
| 4 |
from modules.utils.paths import WEBUI_DIR
|
| 5 |
from test_config import *
|
| 6 |
|
|
|
|
| 29 |
if not os.path.exists(audio_path):
|
| 30 |
download_file(TEST_FILE_DOWNLOAD_URL, audio_path_dir)
|
| 31 |
|
| 32 |
+
answer = TEST_ANSWER
|
| 33 |
+
if diarization:
|
| 34 |
+
answer = "SPEAKER_00|"+TEST_ANSWER
|
| 35 |
+
|
| 36 |
whisper_inferencer = WhisperFactory.create_whisper_inference(
|
| 37 |
whisper_type=whisper_type,
|
| 38 |
)
|
|
|
|
| 59 |
),
|
| 60 |
).to_list()
|
| 61 |
|
| 62 |
+
subtitle_str, file_paths = whisper_inferencer.transcribe_file(
|
| 63 |
[audio_path],
|
| 64 |
None,
|
| 65 |
"SRT",
|
|
|
|
| 67 |
gr.Progress(),
|
| 68 |
*hparams,
|
| 69 |
)
|
| 70 |
+
subtitle = read_file(file_paths[0]).split("\n")
|
| 71 |
+
assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1
|
|
|
|
| 72 |
|
| 73 |
if not is_pytube_detected_bot():
|
| 74 |
+
subtitle_str, file_path = whisper_inferencer.transcribe_youtube(
|
| 75 |
TEST_YOUTUBE_URL,
|
| 76 |
"SRT",
|
| 77 |
False,
|
|
|
|
| 79 |
*hparams,
|
| 80 |
)
|
| 81 |
assert isinstance(subtitle_str, str) and subtitle_str
|
| 82 |
+
assert os.path.exists(file_path)
|
| 83 |
|
| 84 |
+
subtitle_str, file_path = whisper_inferencer.transcribe_mic(
|
| 85 |
audio_path,
|
| 86 |
"SRT",
|
| 87 |
False,
|
| 88 |
gr.Progress(),
|
| 89 |
*hparams,
|
| 90 |
)
|
| 91 |
+
subtitle = read_file(file_path).split("\n")
|
| 92 |
+
assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1
|
| 93 |
|
| 94 |
|
| 95 |
def download_file(url, save_dir):
|