import gradio as gr import pandas as pd import numpy as np from data_loader import MODELS, DATASETS, SCORES, HEADER_CONTENT from chat import ( format_chat_display, format_metrics_display, format_tool_info, ) def get_updated_df(df, df_output): df = df.iloc[: len(df_output)].copy() df["response"] = df_output["response"].tolist() df["rationale"] = df_output["rationale"].tolist() df["explanation"] = df_output["explanation"].tolist() df["score"] = df_output["score"].tolist() cols = [ "conversation", "tools_langchain", "n_turns", "len_query", "n_tools", "response", "rationale", "explanation", "score", ] return df[cols] def get_chat_and_score_df(model, dataset): df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet") df = pd.read_parquet(f"datasets/{dataset}.parquet") df = get_updated_df(df, df_output) return df def on_filter_change( model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, ): try: # Call filter_and_display with index 0 and unpack 4 values chat_html, metrics_html, tool_html, index_html = filter_and_display( model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, 0, ) # Return exactly 4 values return chat_html, metrics_html, tool_html, index_html except Exception as e: error_html = f"""

Filter Error

                {str(e)}
            

""" return ( error_html, "

No metrics available

", "

No tool information available

", "

0/0

", ) def navigate_prev( current_idx, model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, ): try: # Handle current_idx as dictionary if isinstance(current_idx, dict) and "value" in current_idx: idx_val = int(current_idx["value"]) else: idx_val = int(current_idx) if current_idx is not None else 0 new_index = max(0, idx_val - 1) chat_html, metrics_html, tool_html, index_html = filter_and_display( model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, new_index, ) return chat_html, metrics_html, tool_html, index_html, new_index except Exception as e: error_html = f"""

Navigation Error

                {str(e)}
            

""" return ( error_html, "

No metrics available

", "

No tool information available

", "

0/0

", current_idx or 0, ) def navigate_next( current_idx, model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, ): try: # Handle current_idx as dictionary if isinstance(current_idx, dict) and "value" in current_idx: idx_val = int(current_idx["value"]) else: idx_val = int(current_idx) if current_idx is not None else 0 new_index = idx_val + 1 chat_html, metrics_html, tool_html, index_html = filter_and_display( model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, new_index, ) return chat_html, metrics_html, tool_html, index_html, new_index except Exception as e: error_html = f"""

Navigation Error

                {str(e)}
            

""" return ( error_html, "

No metrics available

", "

No tool information available

", "

0/0

", current_idx or 0, ) def filter_and_display( model, dataset, min_score, max_score, min_n_turns, min_len_query, min_n_tools, index=0, ): """Combined function to filter data and update display""" try: # Extract model if isinstance(model, dict): if "value" in model: model_str = str(model["value"]) else: model_str = MODELS[0] else: model_str = str(model) if model is not None else MODELS[0] # Extract dataset if isinstance(dataset, dict): if "value" in dataset: dataset_str = str(dataset["value"]) else: dataset_str = DATASETS[0] else: dataset_str = str(dataset) if dataset is not None else DATASETS[0] # Extract min_score if isinstance(min_score, dict): if "value" in min_score: min_score_val = float(min_score["value"]) else: min_score_val = float(min(SCORES)) else: min_score_val = ( float(min_score) if min_score is not None else float(min(SCORES)) ) # Extract max_score if isinstance(max_score, dict): if "value" in max_score: max_score_val = float(max_score["value"]) else: max_score_val = float(max(SCORES)) else: max_score_val = ( float(max_score) if max_score is not None else float(max(SCORES)) ) # Extract min_n_turns if isinstance(min_n_turns, dict): if "value" in min_n_turns: min_n_turns_val = int(min_n_turns["value"]) else: min_n_turns_val = 0 else: min_n_turns_val = int(min_n_turns) if min_n_turns is not None else 0 # Extract min_len_query if isinstance(min_len_query, dict): if "value" in min_len_query: min_len_query_val = int(min_len_query["value"]) else: min_len_query_val = 0 else: min_len_query_val = int(min_len_query) if min_len_query is not None else 0 # Extract min_n_tools if isinstance(min_n_tools, dict): if "value" in min_n_tools: min_n_tools_val = int(min_n_tools["value"]) else: min_n_tools_val = 0 else: min_n_tools_val = int(min_n_tools) if min_n_tools is not None else 0 # Extract index if isinstance(index, dict): if "value" in index: try: index_val = int(index["value"]) except (ValueError, TypeError): index_val = 0 else: index_val = 0 else: try: index_val = int(index) if index is not None else 0 except (ValueError, TypeError): index_val = 0 # Get the data df_chat = get_chat_and_score_df(model_str, dataset_str) # Ensure filter columns exist for col, default in [ ("score", 0.0), ("n_turns", 0), ("len_query", 0), ("n_tools", 0), ]: if col not in df_chat.columns: df_chat[col] = default else: df_chat[col] = pd.to_numeric(df_chat[col], errors="coerce").fillna( default ) # Apply all filters df_filtered = df_chat[ (df_chat["score"] >= min_score_val) & (df_chat["score"] <= max_score_val) & (df_chat["n_turns"] >= min_n_turns_val) & (df_chat["len_query"] >= min_len_query_val) & (df_chat["n_tools"] >= min_n_tools_val) ].copy() # Check if dataframe is empty if len(df_filtered) == 0: empty_message = """

📭

No Results Found

Try adjusting your filters to see more data

""" return ( empty_message, empty_message, empty_message, "

0/0

", ) # Ensure index is valid max_index = len(df_filtered) - 1 valid_index = max(0, min(index_val, max_index)) # Get the row row = df_filtered.iloc[valid_index] # Format displays chat_html = format_chat_display(row) metrics_html = format_metrics_display(row) # Get tools info with error handling try: tool_html = format_tool_info(row["tools_langchain"]) except Exception as e: tool_html = f"""

Tool Information Unavailable

Error: {str(e)}

""" # Index display index_html = f"""

📄{valid_index + 1}/{len(df_filtered)}

""" return chat_html, metrics_html, tool_html, index_html except Exception as e: error_html = f"""

⚠️

Error Occurred

                    {str(e)}
                

""" return ( error_html, "

No metrics available

", "

No tool information available

", "

0/0

", ) def create_exploration_tab(df): """Create an enhanced data exploration tab with better UI and functionality.""" # Main UI setup with gr.Tab("Data Exploration"): # CSS styling (unchanged) gr.HTML( """ """ ) # Header with gr.Row(elem_id="exploration-header"): gr.HTML(HEADER_CONTENT) # Filters section with gr.Column(elem_classes="filter-container"): gr.Markdown("### 🔍 Filter Options") with gr.Row(equal_height=True, elem_classes="filter-row"): explore_model = gr.Dropdown( choices=MODELS, value=MODELS[0], label="Model", container=True, scale=1, info="Select AI model", ) explore_dataset = gr.Dropdown( choices=DATASETS, value=DATASETS[0], label="Dataset", container=True, scale=1, info="Select evaluation dataset", ) with gr.Row(equal_height=True, elem_classes="filter-row"): min_score = gr.Slider( minimum=float(min(SCORES)), maximum=float(max(SCORES)), value=float(min(SCORES)), step=0.1, label="Minimum TSQ Score", container=True, scale=1, info="Filter responses with scores above this threshold", ) max_score = gr.Slider( minimum=float(min(SCORES)), maximum=float(max(SCORES)), value=float(max(SCORES)), step=0.1, label="Maximum TSQ Score", container=True, scale=1, info="Filter responses with scores below this threshold", ) # Get the data for initial ranges df_chat = get_chat_and_score_df(explore_model.value, explore_dataset.value) # Ensure columns exist and get ranges n_turns_max = int(df_chat["n_turns"].max()) len_query_max = int(df_chat["len_query"].max()) n_tools_max = int(df_chat["n_tools"].max()) with gr.Row(equal_height=True, elem_classes="filter-row"): n_turns_filter = gr.Slider( minimum=0, maximum=n_turns_max, value=0, step=1, label="Minimum Turn Count", container=True, scale=1, info="Filter by minimum number of conversation turns", ) len_query_filter = gr.Slider( minimum=0, maximum=len_query_max, value=0, step=10, label="Minimum Query Length", container=True, scale=1, info="Filter by minimum length of query in characters", ) n_tools_filter = gr.Slider( minimum=0, maximum=n_tools_max, value=0, step=1, label="Minimum Tool Count", container=True, scale=1, info="Filter by minimum number of tools used", ) with gr.Row(): reset_btn = gr.Button("Reset Filters", size="sm", variant="secondary") # Navigation row with gr.Row(variant="panel"): with gr.Column(scale=1): prev_btn = gr.Button( "← Previous", size="lg", variant="secondary", elem_classes="navigation-buttons", ) with gr.Column(scale=1, min_width=100): # Get initial count from default data df_initial = get_chat_and_score_df(MODELS[0], DATASETS[0]) initial_count = len(df_initial) index_display = gr.HTML( value=f"""

📄1/{initial_count}

""", elem_id="index-display", ) with gr.Column(scale=1): next_btn = gr.Button( "Next →", size="lg", variant="secondary", elem_classes="navigation-buttons", ) # Content areas with gr.Row(equal_height=True): with gr.Column(scale=1): chat_display = gr.HTML() with gr.Column(scale=1): metrics_display = gr.HTML() with gr.Row(): tool_info_display = gr.HTML() # State for tracking current index (simple integer state) current_index = gr.State(value=0) def reset_index(): """Reset the current index to 0""" return 0 # Add these explicit event handlers for model and dataset changes explore_model.change( reset_index, inputs=[], outputs=[current_index], ) explore_dataset.change( reset_index, inputs=[], outputs=[current_index], ) min_score.change( reset_index, inputs=[], outputs=[current_index], ) max_score.change( reset_index, inputs=[], outputs=[current_index], ) n_turns_filter.change( reset_index, inputs=[], outputs=[current_index], ) len_query_filter.change( reset_index, inputs=[], outputs=[current_index], ) n_tools_filter.change( reset_index, inputs=[], outputs=[current_index], ) # Reset filters def reset_filters(): return ( MODELS[0], DATASETS[0], float(min(SCORES)), float(max(SCORES)), 0, # n_turns 0, # len_query 0, # n_tools ) reset_btn.click( reset_filters, outputs=[ explore_model, explore_dataset, min_score, max_score, n_turns_filter, len_query_filter, n_tools_filter, ], ) # Connect filter changes # Replace the existing filter connections with this: for control in [ explore_model, explore_dataset, min_score, max_score, n_turns_filter, len_query_filter, n_tools_filter, ]: control.change( on_filter_change, inputs=[ explore_model, explore_dataset, min_score, max_score, n_turns_filter, len_query_filter, n_tools_filter, ], outputs=[ chat_display, metrics_display, tool_info_display, index_display, ], ) # Connect navigation buttons with necessary filter parameters prev_btn.click( navigate_prev, inputs=[ current_index, explore_model, explore_dataset, min_score, max_score, n_turns_filter, len_query_filter, n_tools_filter, ], outputs=[ chat_display, metrics_display, tool_info_display, index_display, current_index, ], ) next_btn.click( navigate_next, inputs=[ current_index, explore_model, explore_dataset, min_score, max_score, n_turns_filter, len_query_filter, n_tools_filter, ], outputs=[ chat_display, metrics_display, tool_info_display, index_display, current_index, ], ) def update_slider_ranges(model, dataset): df_chat = get_chat_and_score_df(model, dataset) # Make sure columns are numeric first df_chat["n_turns"] = pd.to_numeric( df_chat["n_turns"], errors="coerce" ).fillna(0) df_chat["len_query"] = pd.to_numeric( df_chat["len_query"], errors="coerce" ).fillna(0) df_chat["n_tools"] = pd.to_numeric( df_chat["n_tools"], errors="coerce" ).fillna(0) # Calculate maximums with safety buffers n_turns_max = max(1, int(df_chat["n_turns"].max())) len_query_max = max(10, int(df_chat["len_query"].max())) n_tools_max = max(1, int(df_chat["n_tools"].max())) # Return updated sliders using gr.update() return ( gr.update(maximum=n_turns_max, value=0), gr.update(maximum=len_query_max, value=0), gr.update(maximum=n_tools_max, value=0), ) # Connect model and dataset changes to slider range updates explore_model.change( update_slider_ranges, inputs=[explore_model, explore_dataset], outputs=[n_turns_filter, len_query_filter, n_tools_filter], ) explore_dataset.change( update_slider_ranges, inputs=[explore_model, explore_dataset], outputs=[n_turns_filter, len_query_filter, n_tools_filter], ) return [ chat_display, metrics_display, tool_info_display, index_display, ]