Spaces:

Enderchef
/

SuperBench-Eval

Running on Zero

App Files Files Community

Enderchef commited on Jun 25

Commit

6cc6a40

verified ·

1 Parent(s): ed0b9b2

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -37

app.py CHANGED Viewed

@@ -296,7 +296,7 @@ def run_evaluation(model_id, benchmark_category, subject_name, sample_count, pro
         gr.Error("An error occurred during evaluation.")
         # Return updates for error state
-        return "Error occurred during evaluation. We'll evaluate for you! If this persists, please open a community support tab for assistance.", \
                gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
                gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
@@ -657,13 +657,13 @@ with gr.Blocks(css="""
             with gr.Column(elem_classes="gr-box"):
                 model_id_input = gr.Textbox(
                     label="Your Hugging Face Model ID",
-                    placeholder="e.g., mistralai/Mistral-7B-Instruct-v0.2",
                     interactive=True
                 )
                 # New Radio button for benchmark selection for evaluation
                 benchmark_selection_radio = gr.Radio(
-                    ["MMLU", "MMLU-Pro"],
                     label="Select Benchmark Type",
                     value="MMLU", # Default selection
                     interactive=True,
@@ -675,20 +675,22 @@ with gr.Blocks(css="""
                     benchmark_subject_dropdown = gr.Dropdown(
                         label="Choose Subject", # Label changed to be more concise
                         choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
-                        value="ALL", # Default to ALL for MMLU initially
                         interactive=True,
-                        min_width=400
                     )
                     sample_count_slider = gr.Slider(
                         label="Number of Samples per Subject (1-100)",
                         minimum=1,
                         maximum=100,
-                        value=10,
                         step=1,
                         interactive=True,
-                        min_width=200
                     )
-                run_button = gr.Button("🚀 Run Evaluation", elem_classes="gr-button")
             gr.Markdown("<hr>") # Visual separator
@@ -701,29 +703,6 @@ with gr.Blocks(css="""
                     placeholder="Evaluation results will appear here."
                 )
-                # Container for debug info, initially hidden
-                with gr.Column(visible=False, elem_id="debug-error-column") as debug_error_column:
-                    error_message_output = gr.Textbox(
-                        label="Debug Information (Error Details)",
-                        lines=10, interactive=False, elem_classes="gr-output-text", elem_id="error-message-output",
-                        placeholder="Error details will appear here if an error occurs."
-                    )
-                    debug_button = gr.Button("🐛 Hide Debug Info", visible=True, elem_id="debug-button", elem_classes="gr-button")
-                with gr.Row():
-                    show_details_button = gr.Button("🔍 Show Detailed Logs", visible=False, elem_id="show-details-button", elem_classes="gr-button")
-                    download_button = gr.Button("📥 Download Full Evaluation Logs", visible=False, elem_id="download-button", elem_classes="gr-button")
-                # Detailed output, initially hidden
-                detail_output = gr.Textbox(
-                    label="Detailed Evaluation Logs",
-                    lines=20,
-                    interactive=False,
-                    elem_classes="gr-output-text",
-                    placeholder="Detailed logs for each question will appear here upon successful evaluation.",
-                    visible=False # Initially hidden
-                )
             # Define button click actions
             run_button.click(
                 run_evaluation,
@@ -748,12 +727,6 @@ with gr.Blocks(css="""
                 inputs=[detail_output], # Pass the component itself as input
                 outputs=[detail_output] # The component to update
             )
-            # Change button text based on visibility
-            show_details_button.click(
-                lambda s: "🙈 Hide Detailed Logs" if not s else "🔍 Show Detailed Logs",
-                inputs=[detail_output],
-                outputs=[show_details_button]
-            )
             # Toggle visibility of debug error column
             debug_button.click(

         gr.Error("An error occurred during evaluation.")
         # Return updates for error state
+        return "Error occurred during evaluation. We'll evaluate for you if this persists - please open a community support tab for assistance.", \
                gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
                gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
             with gr.Column(elem_classes="gr-box"):
                 model_id_input = gr.Textbox(
                     label="Your Hugging Face Model ID",
+                    placeholder="e.g., ICONNAI/ICONN-1-Mini-Beta",
                     interactive=True
                 )
                 # New Radio button for benchmark selection for evaluation
                 benchmark_selection_radio = gr.Radio(
+                    ["MMLU"],
                     label="Select Benchmark Type",
                     value="MMLU", # Default selection
                     interactive=True,
                     benchmark_subject_dropdown = gr.Dropdown(
                         label="Choose Subject", # Label changed to be more concise
                         choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
+                        value="all", # Default to ALL for MMLU initially
                         interactive=True,
+                        min_width=400,
+                        visible=False
                     )
                     sample_count_slider = gr.Slider(
                         label="Number of Samples per Subject (1-100)",
                         minimum=1,
                         maximum=100,
+                        value=100,
                         step=1,
                         interactive=True,
+                        min_width=200,
+                        visible=False
                     )
+                run_button = gr.Button("Run Evaluation", elem_classes="gr-button")
             gr.Markdown("<hr>") # Visual separator
                     placeholder="Evaluation results will appear here."
                 )
             # Define button click actions
             run_button.click(
                 run_evaluation,
                 inputs=[detail_output], # Pass the component itself as input
                 outputs=[detail_output] # The component to update
             )
             # Toggle visibility of debug error column
             debug_button.click(