Spaces:

esc-benchmark
/

esc

Runtime error

App Files Files Community

esc-bencher commited on Oct 14, 2022

Commit

d3cf2d5

•

1 Parent(s): 671c314

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -16,21 +16,22 @@ SUBMISSION_URL = os.path.join(
 )
 TEST_SETS = [
-    "librispeech-clean",
-    "librispeech-other",
-    "common-voice-9",
-    "vox-populi",
-    "ted-lium",
-    "giga-speech",
-    "spgi-speech",
-    "earnings-22",
     "ami",
 ]
 EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
-OPTIONAL_TEST_SETS = ["switch-board", "call-home", "chime-4"]
-CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -60,22 +61,6 @@ all_submissions = [
     if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
 ]
-COLUMN_NAMES = {
-    "librispeech-clean": "ls-clean",
-    "librispeech-other": "ls-other",
-    "common-voice-9": "cv9",
-    "vox-populi": "vox",
-    "ted-lium": "ted",
-    "giga-speech": "giga",
-    "spgi-speech": "spgi",
-    "earnings-22": "e22",
-    "ami": "ami",
-    "chime-4": "chime",
-    "switch-board": "swbd",
-    "call-home": "ch",
-}
 all_results = read_csv(CSV_RESULTS_FILE)
@@ -85,12 +70,10 @@ table = all_results.copy()
 esc_column = table.pop("esc-score")
 name_column = table.pop("name")
 table.insert(0, "esc-score", esc_column)
-# TODO: revert to scaling raw WER by 100 to retrieve % point values
-table = table.select_dtypes(exclude=['object', 'string'])  # * 100
 table.insert(0, "name", name_column)
 table = table.round(2)
-table = table.rename(columns=COLUMN_NAMES)
-# start indexing from 1
 table.index = table.index + 1
 # Streamlit
@@ -100,11 +83,11 @@ st.markdown(
     f"""
     This is the leaderboard of the End-to end Speech Challenge (ESC).
     Submitted systems are ranked by the **ESC Score** which is the average of
-    all non-optional datasets: {', '.join(list(COLUMN_NAMES.values())[:-3])}."""
 )
 # st.table(table)
-st.dataframe(table.style.format(subset=["esc-score", *[COLUMN_NAMES[k] for k in COLUMN_NAMES]], formatter="{:.1f}"))
 st.markdown(
     """
@@ -118,7 +101,14 @@ st.markdown(
     """
 )
-st.markdown("To submit to ESC, download the audio data for the nine mandatory ESC test sets from [esc-datasets](https://huggingface.co/datasets/esc-benchmark/esc-datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions in the order that the audio samples are provided in a .txt file, with one prediction per line. Name the .txt file according to the ESC test set names shown in the table (e.g. the predictions for LibriSpeech test-clean should be named ls-clean.txt). Once you have evaluated your system on all nine test sets, move the predictions into one folder and zip it. The name you assign to the folder will be the name that is shown on the table (e.g. whisper-aed.zip will be displayed as whisper-aed)."
 )
 # Using the "with" syntax
@@ -138,9 +128,10 @@ if submit_button:
     with st.spinner(f"Computing ESC Score for {submission}..."):
         results = {"name": submission}
-        submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
-        submitted_files = [f for f in submitted_files if f in EXPECTED_TEST_FILES]
         if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
             raise ValueError(
@@ -152,16 +143,27 @@ if submit_button:
             pred_file = os.path.join(SUBMISSION_NAME, submission, file)
             wer = compute_wer(pred_file, ref_file)
-            results[file.split(".")[0]] = str(wer)
         wer_values = [float(results[t]) for t in TEST_SETS]
         all_wer = sum(wer_values) / len(wer_values)
-        results["esc-score"] = all_wer
         all_results = all_results.append(results, ignore_index=True)
         # save and upload new evaluated results
-        all_results.to_csv(CSV_RESULTS_FILE)
         commit_url = submission_repo.push_to_hub()
     st.success('Please refresh this space (CTRL+R) to see your result')

 )
 TEST_SETS = [
+    "ls-clean",
+    "ls-other",
+    "cv",
+    "vox",
+    "ted",
+    "giga",
+    "spgi",
+    "earnings",
     "ami",
 ]
 EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
+OPTIONAL_TEST_SETS = ["swbd", "ch", "chime-4"]
+OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
+CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
 HF_TOKEN = os.environ.get("HF_TOKEN")
     if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
 ]
 all_results = read_csv(CSV_RESULTS_FILE)
 esc_column = table.pop("esc-score")
 name_column = table.pop("name")
 table.insert(0, "esc-score", esc_column)
+table = table.select_dtypes(exclude=['object', 'string'])
 table.insert(0, "name", name_column)
+table = table.sort_values(by=['esc-score'], ascending=True, ignore_index=True)
 table = table.round(2)
 table.index = table.index + 1
 # Streamlit
     f"""
     This is the leaderboard of the End-to end Speech Challenge (ESC).
     Submitted systems are ranked by the **ESC Score** which is the average of
+    all non-optional datasets: {", ".join(TEST_SETS)}. The optional datasets of swbd, ch and chime-4 do not contribute to the ESC score."""
 )
 # st.table(table)
+st.dataframe(table.style.format(subset=["esc-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
 st.markdown(
     """
     """
 )
+st.markdown(
+   """
+   ## Submitting to ESC
+   \n
+   To submit to ESC, download the audio data for the nine mandatory ESC test sets from [esc-datasets](https://huggingface.co/datasets/esc-benchmark/esc-datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions to a .txt file in the order that the audio samples are provided, with one prediction per line. Name the .txt file according to the ESC test set names shown in the table (e.g. the predictions for LibriSpeech test-clean should be named ls-clean.txt).
+   \n
+   Once you have evaluated your system on all nine test sets, move the predictions into one folder and zip it. The name you assign to the zipped folder will be the name that is shown on the table (e.g. whisper-aed.zip will be displayed as whisper-aed). Upload your zipped submissions for scoring and placement on the leaderboard.
+"""
 )
 # Using the "with" syntax
     with st.spinner(f"Computing ESC Score for {submission}..."):
         results = {"name": submission}
+        all_submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
+        submitted_files = [f for f in all_submitted_files if f in EXPECTED_TEST_FILES]
+        submitted_optional_files = [f for f in all_submitted_files if f in OPTIONAL_TEST_FILES]
         if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
             raise ValueError(
             pred_file = os.path.join(SUBMISSION_NAME, submission, file)
             wer = compute_wer(pred_file, ref_file)
+            results[file.split(".")[0]] = round(100 * wer, 2)
+        for file in submitted_optional_files:
+            ref_file = os.path.join(REFERENCE_NAME, file)
+            pred_file = os.path.join(SUBMISSION_NAME, submission, file)
+            wer = compute_wer(pred_file, ref_file)
+            results[file.split(".")[0]] = round(100 * wer, 2)
+        # ESC score is computed over the mandatory test sets only
         wer_values = [float(results[t]) for t in TEST_SETS]
+        # first average over LS test sets
+        wer_values = [sum(wer_values[:2]) / 2, *wer_values[2:]]
+        # then macro-average over ESC test sets
         all_wer = sum(wer_values) / len(wer_values)
+        results["esc-score"] = round(all_wer, 2)
         all_results = all_results.append(results, ignore_index=True)
         # save and upload new evaluated results
+        all_results.to_csv(CSV_RESULTS_FILE, index=False)
         commit_url = submission_repo.push_to_hub()
     st.success('Please refresh this space (CTRL+R) to see your result')