Spaces:

nvidia
/

lotus-vlm-bias-leaderboard

Running

App Files Files Community

huckiyang commited on 6 days ago

Commit

a25014e

1 Parent(s): 35d0ee5

[data] fixing visual

Browse files

Files changed (2) hide show

app.py +29 -59
leaderboard_data.csv +2 -1

app.py CHANGED Viewed

@@ -16,12 +16,13 @@ from src.display.css_html_js import custom_css
 ### Space initialisation
-LEADERBOARD_DF = pd.read_csv("leaderboard_data.csv")
-# Ensure all data is treated as string initially for display consistency
-LEADERBOARD_DF = LEADERBOARD_DF.astype(str)
 BIAS_DF = pd.read_csv("bias_evaluation_data.csv")
-BIAS_DF = BIAS_DF.astype(str).fillna("-") # Fill NaN with '-' to be consistent with how it looks in the LaTeX table
 demo = gr.Blocks(css=custom_css)
@@ -32,68 +33,37 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🧠 Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
-                # 1. Display the table first
-                # Make DataFrame interactive for sorting
                 table_output = gr.DataFrame(value=LEADERBOARD_DF, label="Leaderboard Results", interactive=True, wrap=True)
-                gr.Markdown("---") # Visual separator
-                # 2. Controls below the table
                 gr.Markdown("### Display Options")
-                all_columns_list = LEADERBOARD_DF.columns.tolist()
-                column_selector = gr.CheckboxGroup(
-                    choices=all_columns_list,
-                    value=all_columns_list, # Initially, all columns are selected
-                    label="Select Columns to Display:"
-                )
-                # Assuming the first column is 'Model' for filtering
-                # If leaderboard_data.csv might be empty or have no columns, add checks
-                model_filter_col_name = ""
-                model_filter_choices = []
-                if not LEADERBOARD_DF.empty and LEADERBOARD_DF.columns.any():
-                    model_filter_col_name = LEADERBOARD_DF.columns[0]
-                    model_filter_choices = LEADERBOARD_DF[model_filter_col_name].astype(str).unique().tolist()
                 model_selector = gr.CheckboxGroup(
                     choices=model_filter_choices,
-                    value=model_filter_choices, # Initially, all models are selected
-                    label=f"Filter by {model_filter_col_name} types:" if model_filter_col_name else "Model Filter"
                 )
-                # Update function for the table
-                def update_table(selected_cols, selected_models_from_filter):
-                    temp_df = LEADERBOARD_DF.copy()
-                    # Filter by selected models (from the first column)
-                    if model_filter_col_name and selected_models_from_filter:
-                        temp_df = temp_df[temp_df[model_filter_col_name].isin(selected_models_from_filter)]
-                    elif model_filter_col_name and not selected_models_from_filter: # No models selected, show empty
-                         temp_df = pd.DataFrame(columns=LEADERBOARD_DF.columns)
-                    # Select display columns
-                    # Ensure selected_cols are valid columns present in the temp_df after filtering
-                    valid_selected_cols = [col for col in selected_cols if col in temp_df.columns]
-                    if not valid_selected_cols and not temp_df.empty : # If all columns are deselected, but df is not empty, show all original columns of filtered
-                        final_df = temp_df
-                    elif not valid_selected_cols and temp_df.empty: # if all columns deselected and df is empty
-                        final_df = pd.DataFrame(columns=selected_cols) # empty df with original column names
                     else:
-                        final_df = temp_df[valid_selected_cols]
-                    return gr.DataFrame.update(value=final_df)
-                # Event listeners
-                column_selector.change(
-                    fn=update_table,
-                    inputs=[column_selector, model_selector],
-                    outputs=[table_output]
-                )
                 model_selector.change(
                     fn=update_table,
-                    inputs=[column_selector, model_selector],
                     outputs=[table_output]
                 )
@@ -122,7 +92,7 @@ with demo:
                 # Filter by Model (for the bias table)
                 bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
-                bias_model_selector = gr.CheckboxGroup(
                     choices=bias_model_filter_choices,
                     value=bias_model_filter_choices,
                     label="Filter by Model:"
@@ -157,9 +127,9 @@ with demo:
                     return gr.DataFrame.update(value=final_df)
-                bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
-                bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
-                bias_model_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
             # The original gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") is replaced by the table and its controls.
             # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:

 ### Space initialisation
+# Load with multi-header and set the first column ('Model') as index
+LEADERBOARD_DF = pd.read_csv("leaderboard_data.csv", header=[0, 1], index_col=0)
+# No need to astype(str) globally here, Gradio DataFrame handles it well.
+# If specific styling or type issues arise, it can be done selectively.
 BIAS_DF = pd.read_csv("bias_evaluation_data.csv")
+BIAS_DF = BIAS_DF.astype(str).fillna("-")
 demo = gr.Blocks(css=custom_css)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🧠 Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
+                # Add the image display
+                gr.Image("table_snapshot.png", label="Original Table Snapshot", interactive=False)
+                # Display the table with multi-level header
                 table_output = gr.DataFrame(value=LEADERBOARD_DF, label="Leaderboard Results", interactive=True, wrap=True)
+                gr.Markdown("---")
                 gr.Markdown("### Display Options")
+                # Removed column_selector
+                model_filter_choices = LEADERBOARD_DF.index.tolist()
                 model_selector = gr.CheckboxGroup(
                     choices=model_filter_choices,
+                    value=model_filter_choices,
+                    label="Filter by Model types:"
                 )
+                def update_table(selected_models_from_filter):
+                    # Filter based on index (Model names)
+                    if selected_models_from_filter:
+                        filtered_df = LEADERBOARD_DF.loc[LEADERBOARD_DF.index.isin(selected_models_from_filter)]
                     else:
+                        # If no models are selected, show an empty DataFrame with the same columns
+                        filtered_df = pd.DataFrame(columns=LEADERBOARD_DF.columns)
+                    return gr.DataFrame.update(value=filtered_df)
+                # Event listener (only model_selector now)
                 model_selector.change(
                     fn=update_table,
+                    inputs=[model_selector],
                     outputs=[table_output]
                 )
                 # Filter by Model (for the bias table)
                 bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
+                bias_model_selector_for_bias_tab = gr.CheckboxGroup(
                     choices=bias_model_filter_choices,
                     value=bias_model_filter_choices,
                     label="Filter by Model:"
                     return gr.DataFrame.update(value=final_df)
+                bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
+                bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
+                bias_model_selector_for_bias_tab.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
             # The original gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") is replaced by the table and its controls.
             # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:

leaderboard_data.csv CHANGED Viewed

@@ -1,4 +1,5 @@
-Model,CLIP-S,CapS_S,CapS_A,N-avg_Alignment,Recall,Noun,Verb,N-avg_Descriptiveness,Syn,Sem,N-avg_Complexity,CH_s,FS,FS_s,Harm,N-avg_Side_effects
 MiniGPT-4,60.8,33.0,35.9,0.19,75.3,33.0,34.7,0.22,8.0,32.6,0.38,37.8,55.0,37.6,0.31,0.18
 InstructBLIP,59.9,36.0,35.5,0.18,82.1,34.2,34.7,0.40,7.7,46.0,0.41,58.5,62.4,43.3,0.10,0.66
 LLaVA-1.5,60.1,38.5,45.0,0.67,80.5,32.5,31.0,0.11,7.1,39.6,0.08,49.0,65.7,41.6,0.12,0.71

+Model,Alignment,Alignment,Alignment,Alignment,Descriptiveness,Descriptiveness,Descriptiveness,Descriptiveness,Complexity,Complexity,Complexity,Side effects,Side effects,Side effects,Side effects,Side effects
+Model,CLIP-S,CapS_S,CapS_A,N-avg,Recall,Noun,Verb,N-avg,Syn,Sem,N-avg,CHs↓,FS↑,FSs↑,Harm↓,N-avg↑
 MiniGPT-4,60.8,33.0,35.9,0.19,75.3,33.0,34.7,0.22,8.0,32.6,0.38,37.8,55.0,37.6,0.31,0.18
 InstructBLIP,59.9,36.0,35.5,0.18,82.1,34.2,34.7,0.40,7.7,46.0,0.41,58.5,62.4,43.3,0.10,0.66
 LLaVA-1.5,60.1,38.5,45.0,0.67,80.5,32.5,31.0,0.11,7.1,39.6,0.08,49.0,65.7,41.6,0.12,0.71