Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -296,7 +296,7 @@ def run_evaluation(model_id, benchmark_category, subject_name, sample_count, pro
|
|
296 |
gr.Error("An error occurred during evaluation.")
|
297 |
|
298 |
# Return updates for error state
|
299 |
-
return "Error occurred during evaluation. We'll evaluate for you
|
300 |
gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
|
301 |
gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
|
302 |
|
@@ -657,13 +657,13 @@ with gr.Blocks(css="""
|
|
657 |
with gr.Column(elem_classes="gr-box"):
|
658 |
model_id_input = gr.Textbox(
|
659 |
label="Your Hugging Face Model ID",
|
660 |
-
placeholder="e.g.,
|
661 |
interactive=True
|
662 |
)
|
663 |
|
664 |
# New Radio button for benchmark selection for evaluation
|
665 |
benchmark_selection_radio = gr.Radio(
|
666 |
-
["MMLU"
|
667 |
label="Select Benchmark Type",
|
668 |
value="MMLU", # Default selection
|
669 |
interactive=True,
|
@@ -675,20 +675,22 @@ with gr.Blocks(css="""
|
|
675 |
benchmark_subject_dropdown = gr.Dropdown(
|
676 |
label="Choose Subject", # Label changed to be more concise
|
677 |
choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
|
678 |
-
value="
|
679 |
interactive=True,
|
680 |
-
min_width=400
|
|
|
681 |
)
|
682 |
sample_count_slider = gr.Slider(
|
683 |
label="Number of Samples per Subject (1-100)",
|
684 |
minimum=1,
|
685 |
maximum=100,
|
686 |
-
value=
|
687 |
step=1,
|
688 |
interactive=True,
|
689 |
-
min_width=200
|
|
|
690 |
)
|
691 |
-
run_button = gr.Button("
|
692 |
|
693 |
gr.Markdown("<hr>") # Visual separator
|
694 |
|
@@ -701,29 +703,6 @@ with gr.Blocks(css="""
|
|
701 |
placeholder="Evaluation results will appear here."
|
702 |
)
|
703 |
|
704 |
-
# Container for debug info, initially hidden
|
705 |
-
with gr.Column(visible=False, elem_id="debug-error-column") as debug_error_column:
|
706 |
-
error_message_output = gr.Textbox(
|
707 |
-
label="Debug Information (Error Details)",
|
708 |
-
lines=10, interactive=False, elem_classes="gr-output-text", elem_id="error-message-output",
|
709 |
-
placeholder="Error details will appear here if an error occurs."
|
710 |
-
)
|
711 |
-
debug_button = gr.Button("π Hide Debug Info", visible=True, elem_id="debug-button", elem_classes="gr-button")
|
712 |
-
|
713 |
-
with gr.Row():
|
714 |
-
show_details_button = gr.Button("π Show Detailed Logs", visible=False, elem_id="show-details-button", elem_classes="gr-button")
|
715 |
-
download_button = gr.Button("π₯ Download Full Evaluation Logs", visible=False, elem_id="download-button", elem_classes="gr-button")
|
716 |
-
|
717 |
-
# Detailed output, initially hidden
|
718 |
-
detail_output = gr.Textbox(
|
719 |
-
label="Detailed Evaluation Logs",
|
720 |
-
lines=20,
|
721 |
-
interactive=False,
|
722 |
-
elem_classes="gr-output-text",
|
723 |
-
placeholder="Detailed logs for each question will appear here upon successful evaluation.",
|
724 |
-
visible=False # Initially hidden
|
725 |
-
)
|
726 |
-
|
727 |
# Define button click actions
|
728 |
run_button.click(
|
729 |
run_evaluation,
|
@@ -748,12 +727,6 @@ with gr.Blocks(css="""
|
|
748 |
inputs=[detail_output], # Pass the component itself as input
|
749 |
outputs=[detail_output] # The component to update
|
750 |
)
|
751 |
-
# Change button text based on visibility
|
752 |
-
show_details_button.click(
|
753 |
-
lambda s: "π Hide Detailed Logs" if not s else "π Show Detailed Logs",
|
754 |
-
inputs=[detail_output],
|
755 |
-
outputs=[show_details_button]
|
756 |
-
)
|
757 |
|
758 |
# Toggle visibility of debug error column
|
759 |
debug_button.click(
|
|
|
296 |
gr.Error("An error occurred during evaluation.")
|
297 |
|
298 |
# Return updates for error state
|
299 |
+
return "Error occurred during evaluation. We'll evaluate for you if this persists - please open a community support tab for assistance.", \
|
300 |
gr.update(value=detailed_error_traceback, visible=True), gr.update(visible=True), \
|
301 |
gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False)
|
302 |
|
|
|
657 |
with gr.Column(elem_classes="gr-box"):
|
658 |
model_id_input = gr.Textbox(
|
659 |
label="Your Hugging Face Model ID",
|
660 |
+
placeholder="e.g., ICONNAI/ICONN-1-Mini-Beta",
|
661 |
interactive=True
|
662 |
)
|
663 |
|
664 |
# New Radio button for benchmark selection for evaluation
|
665 |
benchmark_selection_radio = gr.Radio(
|
666 |
+
["MMLU"],
|
667 |
label="Select Benchmark Type",
|
668 |
value="MMLU", # Default selection
|
669 |
interactive=True,
|
|
|
675 |
benchmark_subject_dropdown = gr.Dropdown(
|
676 |
label="Choose Subject", # Label changed to be more concise
|
677 |
choices=INITIAL_GRADIO_DROPDOWN_OPTIONS, # Initial choices (MMLU subjects)
|
678 |
+
value="all", # Default to ALL for MMLU initially
|
679 |
interactive=True,
|
680 |
+
min_width=400,
|
681 |
+
visible=False
|
682 |
)
|
683 |
sample_count_slider = gr.Slider(
|
684 |
label="Number of Samples per Subject (1-100)",
|
685 |
minimum=1,
|
686 |
maximum=100,
|
687 |
+
value=100,
|
688 |
step=1,
|
689 |
interactive=True,
|
690 |
+
min_width=200,
|
691 |
+
visible=False
|
692 |
)
|
693 |
+
run_button = gr.Button("Run Evaluation", elem_classes="gr-button")
|
694 |
|
695 |
gr.Markdown("<hr>") # Visual separator
|
696 |
|
|
|
703 |
placeholder="Evaluation results will appear here."
|
704 |
)
|
705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
706 |
# Define button click actions
|
707 |
run_button.click(
|
708 |
run_evaluation,
|
|
|
727 |
inputs=[detail_output], # Pass the component itself as input
|
728 |
outputs=[detail_output] # The component to update
|
729 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
730 |
|
731 |
# Toggle visibility of debug error column
|
732 |
debug_button.click(
|