agh123's picture
add gpu layers
a068a3a
raw
history blame
12.4 kB
import streamlit as st
import plotly.express as px
import pandas as pd
from typing import Optional, Dict, List, Set
def create_performance_plot(
df: pd.DataFrame, metric: str, title: str, hover_data: List[str] = None
):
"""Create a performance comparison plot"""
if df.empty:
return None
if hover_data is None:
hover_data = ["CPU Cores", "Peak Memory (GB)"]
fig = px.bar(
df,
x="Device",
y=metric,
color="Platform",
title=title,
template="plotly_white",
barmode="group",
hover_data=hover_data,
)
fig.update_layout(
xaxis_title="Device",
yaxis_title="Token/sec",
legend_title="Platform",
plot_bgcolor="white",
height=400,
)
return fig
def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
"""Apply all filters to the dataframe"""
if df.empty:
return df
filtered_df = df.copy()
# Basic filters
if filters["model"] != "All":
filtered_df = filtered_df[filtered_df["Model ID"] == filters["model"]]
if filters["platform"] != "All":
filtered_df = filtered_df[filtered_df["Platform"] == filters["platform"]]
if filters["device"] != "All":
filtered_df = filtered_df[filtered_df["Device"] == filters["device"]]
# Flash Attention filter
if filters["flash_attn"] != "All":
filtered_df = filtered_df[filtered_df["flash_attn"] == filters["flash_attn"]]
# Cache Type filters
if filters["cache_type_k"] != "All":
filtered_df = filtered_df[
filtered_df["cache_type_k"] == filters["cache_type_k"]
]
if filters["cache_type_v"] != "All":
filtered_df = filtered_df[
filtered_df["cache_type_v"] == filters["cache_type_v"]
]
# Range filters
pp_min, pp_max = filters["pp_range"]
if pp_min is not None and pp_max is not None:
pp_values = filtered_df["PP Config"]
filtered_df = filtered_df[(pp_values >= pp_min) & (pp_values <= pp_max)]
tg_min, tg_max = filters["tg_range"]
if tg_min is not None and tg_max is not None:
tg_values = filtered_df["TG Config"]
filtered_df = filtered_df[(tg_values >= tg_min) & (tg_values <= tg_max)]
n_threads_min, n_threads_max = filters["n_threads"]
if n_threads_min is not None and n_threads_max is not None:
n_threads = filtered_df["n_threads"]
filtered_df = filtered_df[
(n_threads >= n_threads_min) & (n_threads <= n_threads_max)
]
n_gpu_layers_min, n_gpu_layers_max = filters["n_gpu_layers"]
if n_gpu_layers_min is not None and n_gpu_layers_max is not None:
n_gpu_layers = filtered_df["n_gpu_layers"]
filtered_df = filtered_df[
(n_gpu_layers >= n_gpu_layers_min) & (n_gpu_layers <= n_gpu_layers_max)
]
# Version filter - handle multiple selections
if filters.get("Version") != "All" and filters.get("Version"):
filtered_df = filtered_df[filtered_df["Version"] == filters["Version"]]
return filtered_df
def render_performance_plots(df: pd.DataFrame, filters: Dict):
"""Render performance comparison plots"""
if df.empty:
st.warning("No data available for plotting.")
return
# Apply filters
filtered_df = filter_dataframe(df, filters)
if filtered_df.empty:
st.warning("No data matches the selected filters for plotting.")
return
## # Extract PP/TG values if not already present
## if "PP Value" not in filtered_df.columns:
## filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
## lambda x: int(x.split("pp: ")[1].split(",")[0])
## )
## if "TG Value" not in filtered_df.columns:
## filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
## lambda x: int(x.split("tg: ")[1].split(")")[0])
## )
# Build aggregation dictionary based on available columns
agg_dict = {}
# Always include performance metrics
agg_dict.update(
{
"Prompt Processing": "mean",
"Token Generation": "mean",
}
)
# Include memory metrics if available
if "Memory Usage (%)" in filtered_df.columns:
agg_dict["Memory Usage (%)"] = "mean"
if "Peak Memory (GB)" in filtered_df.columns:
agg_dict["Peak Memory (GB)"] = "mean"
# Include device info if available
if "CPU Cores" in filtered_df.columns:
agg_dict["CPU Cores"] = "first"
# Include config values
agg_dict.update(
{
"PP Config": "first",
"TG Config": "first",
}
)
# Group by device and platform for plotting
plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index()
# Flatten column names and rename them
# plot_group.columns = [
# col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in plot_group.columns
# ]
# print("plot_group2:", plot_group)
# Rename columns for display
column_mapping = {
"Prompt Processing": "PP Avg (t/s)",
# "Prompt Processing (std)": "PP Std (t/s)",
"Prompt Processing (count)": "Runs",
"Token Generation": "TG Avg (t/s)",
# "Token Generation (std)": "TG Std (t/s)",
"Memory Usage (%) (mean)": "Memory Usage (%)",
"Peak Memory (GB) (mean)": "Peak Memory (GB)",
"PP Config (first)": "PP Config",
"TG Config (first)": "TG Config",
"Model Size (first)": "Model Size",
"CPU Cores (first)": "CPU Cores",
"Total Memory (GB) (first)": "Total Memory (GB)",
"n_threads (first)": "n_threads",
"flash_attn (first)": "flash_attn",
"cache_type_k (first)": "cache_type_k",
"cache_type_v (first)": "cache_type_v",
"n_context (first)": "n_context",
"n_batch (first)": "n_batch",
"n_ubatch (first)": "n_ubatch",
}
plot_group = plot_group.rename(columns=column_mapping)
# Define hover data based on available columns
hover_data = []
if "CPU Cores" in plot_group.columns:
hover_data.append("CPU Cores")
if "Peak Memory (GB)" in plot_group.columns:
hover_data.append("Peak Memory (GB)")
# Create plots
col1, col2 = st.columns(2)
with col1:
fig1 = create_performance_plot(
plot_group,
"PP Avg (t/s)",
f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})",
hover_data=hover_data,
)
if fig1:
st.plotly_chart(fig1, use_container_width=True)
with col2:
fig2 = create_performance_plot(
plot_group,
"TG Avg (t/s)",
f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})",
hover_data=hover_data,
)
if fig2:
st.plotly_chart(fig2, use_container_width=True)
def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
"""Render the leaderboard table with grouped and formatted data"""
if df.empty:
st.warning("No data available for the selected filters.")
return
# Apply filters
filtered_df = filter_dataframe(df, filters)
if filtered_df.empty:
st.warning("No data matches the selected filters.")
return
# Define the preferred column order (grouped logically)
column_order = [
# Device Info
"Device",
"Platform",
"CPU Cores",
"Total Memory (GB)",
"Peak Memory (GB)",
"Memory Usage (%)"
# Benchmark Results
"PP Config",
"PP Avg (t/s)",
"PP Std (t/s)",
"TG Config",
"TG Avg (t/s)",
"TG Std (t/s)",
# Model Config
"Model ID",
"Model Size",
"n_threads",
"flash_attn",
"cache_type_k",
"cache_type_v",
"n_context",
"n_batch",
"n_ubatch",
"Version",
]
# Group by selected columns
grouping_cols = filters["grouping"]
if not grouping_cols:
grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
# Create aggregations (excluding grouping columns)
agg_dict = {
col: agg
for col, agg in {
"Prompt Processing": ["mean", "std"],
"Token Generation": ["mean", "std"],
"Peak Memory (GB)": "mean",
"Total Memory (GB)": "first",
"CPU Cores": "first",
"Model Size": "first",
"Version": lambda x: ", ".join(sorted(set(x))),
"n_gpu_layers": lambda x: ", ".join(sorted(set(str(x)))),
}.items()
if col not in grouping_cols
}
# Group and aggregate
grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
# Flatten column names
grouped_df.columns = [
col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
]
# Rename columns for display
column_mapping = {
"Prompt Processing (mean)": "PP Avg (t/s)",
"Prompt Processing (std)": "PP Std (t/s)",
"Token Generation (mean)": "TG Avg (t/s)",
"Token Generation (std)": "TG Std (t/s)",
"Memory Usage (%) (mean)": "Memory Usage (%)",
"Peak Memory (GB) (mean)": "Peak Memory (GB)",
"PP Config (first)": "PP Config",
"TG Config (first)": "TG Config",
"Model Size (first)": "Model Size",
"CPU Cores (first)": "CPU Cores",
"Total Memory (GB) (first)": "Total Memory (GB)",
"n_threads (first)": "n_threads",
"flash_attn (first)": "flash_attn",
"cache_type_k (first)": "cache_type_k",
"cache_type_v (first)": "cache_type_v",
"n_context (first)": "n_context",
"n_batch (first)": "n_batch",
"n_ubatch (first)": "n_ubatch",
"Version (<lambda>)": "Version",
}
grouped_df = grouped_df.rename(columns=column_mapping)
# Filter visible columns
visible_cols = filters["visible_columns"]
if visible_cols:
# Map the user-friendly names to actual column names
column_name_mapping = {
"Device": "Device",
"Platform": "Platform",
"CPU Cores": "CPU Cores",
"Total Memory (GB)": "Total Memory (GB)",
"Peak Memory (GB)": "Peak Memory (GB)",
"Memory Usage (%)": "Memory Usage (%)",
"PP Config": "PP Config",
"TG Config": "TG Config",
"Prompt Processing (mean)": "PP Avg (t/s)",
"Token Generation (mean)": "TG Avg (t/s)",
"Prompt Processing (std)": "PP Std (t/s)",
"Token Generation (std)": "TG Std (t/s)",
"Model": "Model ID",
"Model Size": "Model Size",
"Model ID": "Model ID",
"n_threads": "n_threads",
"flash_attn": "flash_attn",
"cache_type_k": "cache_type_k",
"cache_type_v": "cache_type_v",
"n_context": "n_context",
"n_batch": "n_batch",
"n_ubatch": "n_ubatch",
"Version": "Version",
}
# Convert visible columns and grouping columns to their mapped names
mapped_visible = {column_name_mapping.get(col, col) for col in visible_cols}
mapped_grouping = {
column_name_mapping.get(col, col) for col in filters["grouping"]
}
# Combine both sets to get unique columns
all_cols = mapped_visible | mapped_grouping
# Create final display columns list
display_cols = []
# Get all available columns we want to display
available_cols = set(all_cols)
# Add columns in the predefined order
for col in column_order:
if col in available_cols:
display_cols.append(col)
# Add any remaining columns that weren't in our predefined order
remaining_cols = sorted(list(available_cols - set(display_cols)))
display_cols.extend(remaining_cols)
else:
# Default columns if none selected
display_cols = column_order[:8] # First 8 columns from the predefined order
# Display the filtered and grouped table
st.markdown("#### 📊 Benchmark Results")
st.dataframe(
grouped_df[display_cols],
use_container_width=True,
height=400,
)