Spaces:

sparse-generative-ai
/

open-moe-llm-leaderboard

Running

chivier commited on Jun 11, 2024

Commit

94797da

1 Parent(s): f745515

sync from github

Files changed (4) hide show

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🔥
 colorFrom: green
 colorTo: indigo
 sdk: gradio
-sdk_version: 4.9.0
 app_file: app.py
 pinned: true
 license: apache-2.0

 colorFrom: green
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.36.1
 app_file: app.py
 pinned: true
 license: apache-2.0

app.py CHANGED Viewed

@@ -75,7 +75,7 @@ def restart_space():
 def init_space():
-    dataset_df = get_dataset_summary_table(file_path="blog/Hallucination-Leaderboard-Summary.csv")
     if socket.gethostname() not in {"neuromancer"}:
         # sync model_type with open-llm-leaderboard
@@ -90,7 +90,8 @@ def init_space():
     finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(
         EVAL_REQUESTS_PATH, EVAL_COLS
     )
-    return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
 def add_benchmark_columns(shown_columns):
@@ -353,21 +354,21 @@ with demo:
                     queue=True,
                 )
-        with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-            dataset_table = gr.components.Dataframe(
-                value=dataset_df,
-                headers=list(dataset_df.columns),
-                datatype=["str", "markdown", "str", "str", "str"],
-                elem_id="dataset-table",
-                interactive=False,
-                visible=True,
-                column_widths=["15%", "20%"],
-            )
-            gr.Markdown(LLM_BENCHMARKS_DETAILS, elem_classes="markdown-text")
-            gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
         with gr.TabItem("Submit a model ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():

 def init_space():
+    # dataset_df = get_dataset_summary_table(file_path="blog/Hallucination-Leaderboard-Summary.csv")
     if socket.gethostname() not in {"neuromancer"}:
         # sync model_type with open-llm-leaderboard
     finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(
         EVAL_REQUESTS_PATH, EVAL_COLS
     )
+    # return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
+    return None, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
 def add_benchmark_columns(shown_columns):
                     queue=True,
                 )
+        # with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
+        #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        #     dataset_table = gr.components.Dataframe(
+        #         value=dataset_df,
+        #         headers=list(dataset_df.columns),
+        #         datatype=["str", "markdown", "str", "str", "str"],
+        #         elem_id="dataset-table",
+        #         interactive=False,
+        #         visible=True,
+        #         column_widths=["15%", "20%"],
+        #     )
+        #     gr.Markdown(LLM_BENCHMARKS_DETAILS, elem_classes="markdown-text")
+        #     gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
         with gr.TabItem("Submit a model ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ APScheduler
 black
 click
 datasets
-gradio
 gradio_client
 huggingface-hub
 matplotlib

 black
 click
 datasets
+gradio==4.36.1
 gradio_client
 huggingface-hub
 matplotlib

src/leaderboard/read_evals.py CHANGED Viewed

@@ -277,15 +277,22 @@ def get_raw_eval_results(results_path: str, requests_path: str, is_backend: bool
     eval_results = {}
     for model_result_filepath in tqdm(model_result_filepaths, desc="reading model_result_filepaths"):
-        # Creation of result
-        eval_result = EvalResult.init_from_json_file(model_result_filepath, is_backend=is_backend)
-        eval_result.update_with_request_file(requests_path)
-        # Store results of same eval together
-        eval_name = eval_result.eval_name
-        if eval_name in eval_results.keys():
-            eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
-        else:
-            eval_results[eval_name] = eval_result
     results = []
     for v in eval_results.values():

     eval_results = {}
     for model_result_filepath in tqdm(model_result_filepaths, desc="reading model_result_filepaths"):
+        try:
+            # Creation of result
+            eval_result = EvalResult.init_from_json_file(model_result_filepath, is_backend=is_backend)
+            eval_result.update_with_request_file(requests_path)
+            # Store results of same eval together
+            eval_name = eval_result.eval_name
+            if eval_name in eval_results.keys():
+                eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
+            else:
+                eval_results[eval_name] = eval_result
+        except (FileNotFoundError, ValueError, KeyError, json.JSONDecodeError) as e:
+            # Log the error and continue with the next file
+            print(f"Error processing file {model_result_filepath}: {e}")
+            continue
     results = []
     for v in eval_results.values():