import gradio as gr
import pandas as pd
import numpy as np
from data_loader import MODELS, DATASETS, SCORES, HEADER_CONTENT
from chat import (
format_chat_display,
format_metrics_display,
format_tool_info,
)
def get_updated_df(df, df_output):
df = df.iloc[: len(df_output)].copy()
df["response"] = df_output["response"].tolist()
df["rationale"] = df_output["rationale"].tolist()
df["explanation"] = df_output["explanation"].tolist()
df["score"] = df_output["score"].tolist()
cols = [
"conversation",
"tools_langchain",
"n_turns",
"len_query",
"n_tools",
"response",
"rationale",
"explanation",
"score",
]
return df[cols]
def get_chat_and_score_df(model, dataset):
df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet")
df = pd.read_parquet(f"datasets/{dataset}.parquet")
df = get_updated_df(df, df_output)
return df
def on_filter_change(
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
):
try:
# Call filter_and_display with index 0 and unpack 4 values
chat_html, metrics_html, tool_html, index_html = filter_and_display(
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
0,
)
# Return exactly 4 values
return chat_html, metrics_html, tool_html, index_html
except Exception as e:
error_html = f"""
"""
return (
error_html,
"No metrics available
",
"No tool information available
",
"0/0
",
)
def navigate_prev(
current_idx,
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
):
try:
# Handle current_idx as dictionary
if isinstance(current_idx, dict) and "value" in current_idx:
idx_val = int(current_idx["value"])
else:
idx_val = int(current_idx) if current_idx is not None else 0
new_index = max(0, idx_val - 1)
chat_html, metrics_html, tool_html, index_html = filter_and_display(
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
new_index,
)
return chat_html, metrics_html, tool_html, index_html, new_index
except Exception as e:
error_html = f"""
Navigation Error
{str(e)}
"""
return (
error_html,
"No metrics available
",
"No tool information available
",
"0/0
",
current_idx or 0,
)
def navigate_next(
current_idx,
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
):
try:
# Handle current_idx as dictionary
if isinstance(current_idx, dict) and "value" in current_idx:
idx_val = int(current_idx["value"])
else:
idx_val = int(current_idx) if current_idx is not None else 0
new_index = idx_val + 1
chat_html, metrics_html, tool_html, index_html = filter_and_display(
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
new_index,
)
return chat_html, metrics_html, tool_html, index_html, new_index
except Exception as e:
error_html = f"""
Navigation Error
{str(e)}
"""
return (
error_html,
"No metrics available
",
"No tool information available
",
"0/0
",
current_idx or 0,
)
def filter_and_display(
model,
dataset,
min_score,
max_score,
min_n_turns,
min_len_query,
min_n_tools,
index=0,
):
"""Combined function to filter data and update display"""
try:
# Extract model
if isinstance(model, dict):
if "value" in model:
model_str = str(model["value"])
else:
model_str = MODELS[0]
else:
model_str = str(model) if model is not None else MODELS[0]
# Extract dataset
if isinstance(dataset, dict):
if "value" in dataset:
dataset_str = str(dataset["value"])
else:
dataset_str = DATASETS[0]
else:
dataset_str = str(dataset) if dataset is not None else DATASETS[0]
# Extract min_score
if isinstance(min_score, dict):
if "value" in min_score:
min_score_val = float(min_score["value"])
else:
min_score_val = float(min(SCORES))
else:
min_score_val = (
float(min_score) if min_score is not None else float(min(SCORES))
)
# Extract max_score
if isinstance(max_score, dict):
if "value" in max_score:
max_score_val = float(max_score["value"])
else:
max_score_val = float(max(SCORES))
else:
max_score_val = (
float(max_score) if max_score is not None else float(max(SCORES))
)
# Extract min_n_turns
if isinstance(min_n_turns, dict):
if "value" in min_n_turns:
min_n_turns_val = int(min_n_turns["value"])
else:
min_n_turns_val = 0
else:
min_n_turns_val = int(min_n_turns) if min_n_turns is not None else 0
# Extract min_len_query
if isinstance(min_len_query, dict):
if "value" in min_len_query:
min_len_query_val = int(min_len_query["value"])
else:
min_len_query_val = 0
else:
min_len_query_val = int(min_len_query) if min_len_query is not None else 0
# Extract min_n_tools
if isinstance(min_n_tools, dict):
if "value" in min_n_tools:
min_n_tools_val = int(min_n_tools["value"])
else:
min_n_tools_val = 0
else:
min_n_tools_val = int(min_n_tools) if min_n_tools is not None else 0
# Extract index
if isinstance(index, dict):
if "value" in index:
try:
index_val = int(index["value"])
except (ValueError, TypeError):
index_val = 0
else:
index_val = 0
else:
try:
index_val = int(index) if index is not None else 0
except (ValueError, TypeError):
index_val = 0
# Get the data
df_chat = get_chat_and_score_df(model_str, dataset_str)
# Ensure filter columns exist
for col, default in [
("score", 0.0),
("n_turns", 0),
("len_query", 0),
("n_tools", 0),
]:
if col not in df_chat.columns:
df_chat[col] = default
else:
df_chat[col] = pd.to_numeric(df_chat[col], errors="coerce").fillna(
default
)
# Apply all filters
df_filtered = df_chat[
(df_chat["score"] >= min_score_val)
& (df_chat["score"] <= max_score_val)
& (df_chat["n_turns"] >= min_n_turns_val)
& (df_chat["len_query"] >= min_len_query_val)
& (df_chat["n_tools"] >= min_n_tools_val)
].copy()
# Check if dataframe is empty
if len(df_filtered) == 0:
empty_message = """
📭
No Results Found
Try adjusting your filters to see more data
"""
return (
empty_message,
empty_message,
empty_message,
"0/0
",
)
# Ensure index is valid
max_index = len(df_filtered) - 1
valid_index = max(0, min(index_val, max_index))
# Get the row
row = df_filtered.iloc[valid_index]
# Format displays
chat_html = format_chat_display(row)
metrics_html = format_metrics_display(row)
# Get tools info with error handling
try:
tool_html = format_tool_info(row["tools_langchain"])
except Exception as e:
tool_html = f"""
Tool Information Unavailable
Error: {str(e)}
"""
# Index display
index_html = f"""
📄{valid_index + 1}/{len(df_filtered)}
"""
return chat_html, metrics_html, tool_html, index_html
except Exception as e:
error_html = f"""
"""
return (
error_html,
"No metrics available
",
"No tool information available
",
"0/0
",
)
def create_exploration_tab(df):
"""Create an enhanced data exploration tab with better UI and functionality."""
# Main UI setup
with gr.Tab("Data Exploration"):
# CSS styling (unchanged)
gr.HTML(
"""
"""
)
# Header
with gr.Row(elem_id="exploration-header"):
gr.HTML(HEADER_CONTENT)
# Filters section
with gr.Column(elem_classes="filter-container"):
gr.Markdown("### 🔍 Filter Options")
with gr.Row(equal_height=True, elem_classes="filter-row"):
explore_model = gr.Dropdown(
choices=MODELS,
value=MODELS[0],
label="Model",
container=True,
scale=1,
info="Select AI model",
)
explore_dataset = gr.Dropdown(
choices=DATASETS,
value=DATASETS[0],
label="Dataset",
container=True,
scale=1,
info="Select evaluation dataset",
)
with gr.Row(equal_height=True, elem_classes="filter-row"):
min_score = gr.Slider(
minimum=float(min(SCORES)),
maximum=float(max(SCORES)),
value=float(min(SCORES)),
step=0.1,
label="Minimum TSQ Score",
container=True,
scale=1,
info="Filter responses with scores above this threshold",
)
max_score = gr.Slider(
minimum=float(min(SCORES)),
maximum=float(max(SCORES)),
value=float(max(SCORES)),
step=0.1,
label="Maximum TSQ Score",
container=True,
scale=1,
info="Filter responses with scores below this threshold",
)
# Get the data for initial ranges
df_chat = get_chat_and_score_df(explore_model.value, explore_dataset.value)
# Ensure columns exist and get ranges
n_turns_max = int(df_chat["n_turns"].max())
len_query_max = int(df_chat["len_query"].max())
n_tools_max = int(df_chat["n_tools"].max())
with gr.Row(equal_height=True, elem_classes="filter-row"):
n_turns_filter = gr.Slider(
minimum=0,
maximum=n_turns_max,
value=0,
step=1,
label="Minimum Turn Count",
container=True,
scale=1,
info="Filter by minimum number of conversation turns",
)
len_query_filter = gr.Slider(
minimum=0,
maximum=len_query_max,
value=0,
step=10,
label="Minimum Query Length",
container=True,
scale=1,
info="Filter by minimum length of query in characters",
)
n_tools_filter = gr.Slider(
minimum=0,
maximum=n_tools_max,
value=0,
step=1,
label="Minimum Tool Count",
container=True,
scale=1,
info="Filter by minimum number of tools used",
)
with gr.Row():
reset_btn = gr.Button("Reset Filters", size="sm", variant="secondary")
# Navigation row
with gr.Row(variant="panel"):
with gr.Column(scale=1):
prev_btn = gr.Button(
"← Previous",
size="lg",
variant="secondary",
elem_classes="navigation-buttons",
)
with gr.Column(scale=1, min_width=100):
# Get initial count from default data
df_initial = get_chat_and_score_df(MODELS[0], DATASETS[0])
initial_count = len(df_initial)
index_display = gr.HTML(
value=f"""
📄1/{initial_count}
""",
elem_id="index-display",
)
with gr.Column(scale=1):
next_btn = gr.Button(
"Next →",
size="lg",
variant="secondary",
elem_classes="navigation-buttons",
)
# Content areas
with gr.Row(equal_height=True):
with gr.Column(scale=1):
chat_display = gr.HTML()
with gr.Column(scale=1):
metrics_display = gr.HTML()
with gr.Row():
tool_info_display = gr.HTML()
# State for tracking current index (simple integer state)
current_index = gr.State(value=0)
def reset_index():
"""Reset the current index to 0"""
return 0
# Add these explicit event handlers for model and dataset changes
explore_model.change(
reset_index,
inputs=[],
outputs=[current_index],
)
explore_dataset.change(
reset_index,
inputs=[],
outputs=[current_index],
)
min_score.change(
reset_index,
inputs=[],
outputs=[current_index],
)
max_score.change(
reset_index,
inputs=[],
outputs=[current_index],
)
n_turns_filter.change(
reset_index,
inputs=[],
outputs=[current_index],
)
len_query_filter.change(
reset_index,
inputs=[],
outputs=[current_index],
)
n_tools_filter.change(
reset_index,
inputs=[],
outputs=[current_index],
)
# Reset filters
def reset_filters():
return (
MODELS[0],
DATASETS[0],
float(min(SCORES)),
float(max(SCORES)),
0, # n_turns
0, # len_query
0, # n_tools
)
reset_btn.click(
reset_filters,
outputs=[
explore_model,
explore_dataset,
min_score,
max_score,
n_turns_filter,
len_query_filter,
n_tools_filter,
],
)
# Connect filter changes
# Replace the existing filter connections with this:
for control in [
explore_model,
explore_dataset,
min_score,
max_score,
n_turns_filter,
len_query_filter,
n_tools_filter,
]:
control.change(
on_filter_change,
inputs=[
explore_model,
explore_dataset,
min_score,
max_score,
n_turns_filter,
len_query_filter,
n_tools_filter,
],
outputs=[
chat_display,
metrics_display,
tool_info_display,
index_display,
],
)
# Connect navigation buttons with necessary filter parameters
prev_btn.click(
navigate_prev,
inputs=[
current_index,
explore_model,
explore_dataset,
min_score,
max_score,
n_turns_filter,
len_query_filter,
n_tools_filter,
],
outputs=[
chat_display,
metrics_display,
tool_info_display,
index_display,
current_index,
],
)
next_btn.click(
navigate_next,
inputs=[
current_index,
explore_model,
explore_dataset,
min_score,
max_score,
n_turns_filter,
len_query_filter,
n_tools_filter,
],
outputs=[
chat_display,
metrics_display,
tool_info_display,
index_display,
current_index,
],
)
def update_slider_ranges(model, dataset):
df_chat = get_chat_and_score_df(model, dataset)
# Make sure columns are numeric first
df_chat["n_turns"] = pd.to_numeric(
df_chat["n_turns"], errors="coerce"
).fillna(0)
df_chat["len_query"] = pd.to_numeric(
df_chat["len_query"], errors="coerce"
).fillna(0)
df_chat["n_tools"] = pd.to_numeric(
df_chat["n_tools"], errors="coerce"
).fillna(0)
# Calculate maximums with safety buffers
n_turns_max = max(1, int(df_chat["n_turns"].max()))
len_query_max = max(10, int(df_chat["len_query"].max()))
n_tools_max = max(1, int(df_chat["n_tools"].max()))
# Return updated sliders using gr.update()
return (
gr.update(maximum=n_turns_max, value=0),
gr.update(maximum=len_query_max, value=0),
gr.update(maximum=n_tools_max, value=0),
)
# Connect model and dataset changes to slider range updates
explore_model.change(
update_slider_ranges,
inputs=[explore_model, explore_dataset],
outputs=[n_turns_filter, len_query_filter, n_tools_filter],
)
explore_dataset.change(
update_slider_ranges,
inputs=[explore_model, explore_dataset],
outputs=[n_turns_filter, len_query_filter, n_tools_filter],
)
return [
chat_display,
metrics_display,
tool_info_display,
index_display,
]