|
import streamlit as st |
|
import plotly.express as px |
|
import pandas as pd |
|
from typing import Optional, Dict, List, Set |
|
|
|
|
|
def create_performance_plot( |
|
df: pd.DataFrame, metric: str, title: str, hover_data: List[str] = None |
|
): |
|
"""Create a performance comparison plot""" |
|
if df.empty: |
|
return None |
|
|
|
if hover_data is None: |
|
hover_data = ["CPU Cores", "Peak Memory (GB)"] |
|
|
|
fig = px.bar( |
|
df, |
|
x="Device", |
|
y=metric, |
|
color="Platform", |
|
title=title, |
|
template="plotly_white", |
|
barmode="group", |
|
hover_data=hover_data, |
|
) |
|
fig.update_layout( |
|
xaxis_title="Device", |
|
yaxis_title="Token/sec", |
|
legend_title="Platform", |
|
plot_bgcolor="white", |
|
height=400, |
|
) |
|
return fig |
|
|
|
|
|
def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame: |
|
"""Apply all filters to the dataframe""" |
|
if df.empty: |
|
return df |
|
|
|
filtered_df = df.copy() |
|
|
|
|
|
if filters["model"] != "All": |
|
filtered_df = filtered_df[filtered_df["Model ID"] == filters["model"]] |
|
if filters["platform"] != "All": |
|
filtered_df = filtered_df[filtered_df["Platform"] == filters["platform"]] |
|
if filters["device"] != "All": |
|
filtered_df = filtered_df[filtered_df["Device"] == filters["device"]] |
|
|
|
|
|
if filters["flash_attn"] != "All": |
|
filtered_df = filtered_df[filtered_df["flash_attn"] == filters["flash_attn"]] |
|
|
|
|
|
if filters["cache_type_k"] != "All": |
|
filtered_df = filtered_df[ |
|
filtered_df["cache_type_k"] == filters["cache_type_k"] |
|
] |
|
|
|
if filters["cache_type_v"] != "All": |
|
filtered_df = filtered_df[ |
|
filtered_df["cache_type_v"] == filters["cache_type_v"] |
|
] |
|
|
|
|
|
pp_min, pp_max = filters["pp_range"] |
|
if pp_min is not None and pp_max is not None: |
|
pp_values = filtered_df["PP Config"] |
|
filtered_df = filtered_df[(pp_values >= pp_min) & (pp_values <= pp_max)] |
|
|
|
tg_min, tg_max = filters["tg_range"] |
|
if tg_min is not None and tg_max is not None: |
|
tg_values = filtered_df["TG Config"] |
|
filtered_df = filtered_df[(tg_values >= tg_min) & (tg_values <= tg_max)] |
|
|
|
n_threads_min, n_threads_max = filters["n_threads"] |
|
|
|
if n_threads_min is not None and n_threads_max is not None: |
|
n_threads = filtered_df["n_threads"] |
|
filtered_df = filtered_df[ |
|
(n_threads >= n_threads_min) & (n_threads <= n_threads_max) |
|
] |
|
|
|
n_gpu_layers_min, n_gpu_layers_max = filters["n_gpu_layers"] |
|
if n_gpu_layers_min is not None and n_gpu_layers_max is not None: |
|
n_gpu_layers = filtered_df["n_gpu_layers"] |
|
filtered_df = filtered_df[ |
|
(n_gpu_layers >= n_gpu_layers_min) & (n_gpu_layers <= n_gpu_layers_max) |
|
] |
|
|
|
|
|
if filters.get("Version") != "All" and filters.get("Version"): |
|
filtered_df = filtered_df[filtered_df["Version"] == filters["Version"]] |
|
|
|
return filtered_df |
|
|
|
|
|
def render_performance_plots(df: pd.DataFrame, filters: Dict): |
|
"""Render performance comparison plots""" |
|
if df.empty: |
|
st.warning("No data available for plotting.") |
|
return |
|
|
|
|
|
filtered_df = filter_dataframe(df, filters) |
|
if filtered_df.empty: |
|
st.warning("No data matches the selected filters for plotting.") |
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agg_dict = {} |
|
|
|
|
|
agg_dict.update( |
|
{ |
|
"Prompt Processing": "mean", |
|
"Token Generation": "mean", |
|
} |
|
) |
|
|
|
|
|
if "Memory Usage (%)" in filtered_df.columns: |
|
agg_dict["Memory Usage (%)"] = "mean" |
|
if "Peak Memory (GB)" in filtered_df.columns: |
|
agg_dict["Peak Memory (GB)"] = "mean" |
|
|
|
|
|
if "CPU Cores" in filtered_df.columns: |
|
agg_dict["CPU Cores"] = "first" |
|
|
|
|
|
agg_dict.update( |
|
{ |
|
"PP Config": "first", |
|
"TG Config": "first", |
|
} |
|
) |
|
|
|
|
|
plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
column_mapping = { |
|
"Prompt Processing": "PP Avg (t/s)", |
|
|
|
"Prompt Processing (count)": "Runs", |
|
"Token Generation": "TG Avg (t/s)", |
|
|
|
"Memory Usage (%) (mean)": "Memory Usage (%)", |
|
"Peak Memory (GB) (mean)": "Peak Memory (GB)", |
|
"PP Config (first)": "PP Config", |
|
"TG Config (first)": "TG Config", |
|
"Model Size (first)": "Model Size", |
|
"CPU Cores (first)": "CPU Cores", |
|
"Total Memory (GB) (first)": "Total Memory (GB)", |
|
"n_threads (first)": "n_threads", |
|
"flash_attn (first)": "flash_attn", |
|
"cache_type_k (first)": "cache_type_k", |
|
"cache_type_v (first)": "cache_type_v", |
|
"n_context (first)": "n_context", |
|
"n_batch (first)": "n_batch", |
|
"n_ubatch (first)": "n_ubatch", |
|
} |
|
plot_group = plot_group.rename(columns=column_mapping) |
|
|
|
|
|
hover_data = [] |
|
if "CPU Cores" in plot_group.columns: |
|
hover_data.append("CPU Cores") |
|
if "Peak Memory (GB)" in plot_group.columns: |
|
hover_data.append("Peak Memory (GB)") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
fig1 = create_performance_plot( |
|
plot_group, |
|
"PP Avg (t/s)", |
|
f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})", |
|
hover_data=hover_data, |
|
) |
|
if fig1: |
|
st.plotly_chart(fig1, use_container_width=True) |
|
|
|
with col2: |
|
fig2 = create_performance_plot( |
|
plot_group, |
|
"TG Avg (t/s)", |
|
f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})", |
|
hover_data=hover_data, |
|
) |
|
if fig2: |
|
st.plotly_chart(fig2, use_container_width=True) |
|
|
|
|
|
def render_leaderboard_table(df: pd.DataFrame, filters: Dict): |
|
"""Render the leaderboard table with grouped and formatted data""" |
|
if df.empty: |
|
st.warning("No data available for the selected filters.") |
|
return |
|
|
|
|
|
filtered_df = filter_dataframe(df, filters) |
|
if filtered_df.empty: |
|
st.warning("No data matches the selected filters.") |
|
return |
|
|
|
|
|
column_order = [ |
|
|
|
"Device", |
|
"Platform", |
|
"CPU Cores", |
|
"Total Memory (GB)", |
|
"Peak Memory (GB)", |
|
"Memory Usage (%)" |
|
|
|
"PP Config", |
|
"PP Avg (t/s)", |
|
"PP Std (t/s)", |
|
"TG Config", |
|
"TG Avg (t/s)", |
|
"TG Std (t/s)", |
|
|
|
"Model ID", |
|
"Model Size", |
|
"n_threads", |
|
"flash_attn", |
|
"cache_type_k", |
|
"cache_type_v", |
|
"n_context", |
|
"n_batch", |
|
"n_ubatch", |
|
"Version", |
|
] |
|
|
|
|
|
grouping_cols = filters["grouping"] |
|
if not grouping_cols: |
|
grouping_cols = ["Model ID", "Device", "Platform"] |
|
|
|
|
|
agg_dict = { |
|
col: agg |
|
for col, agg in { |
|
"Prompt Processing": ["mean", "std"], |
|
"Token Generation": ["mean", "std"], |
|
"Peak Memory (GB)": "mean", |
|
"Total Memory (GB)": "first", |
|
"CPU Cores": "first", |
|
"Model Size": "first", |
|
"Version": lambda x: ", ".join(sorted(set(x))), |
|
"n_gpu_layers": lambda x: ", ".join(sorted(set(str(x)))), |
|
}.items() |
|
if col not in grouping_cols |
|
} |
|
|
|
|
|
grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index() |
|
|
|
|
|
grouped_df.columns = [ |
|
col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns |
|
] |
|
|
|
|
|
column_mapping = { |
|
"Prompt Processing (mean)": "PP Avg (t/s)", |
|
"Prompt Processing (std)": "PP Std (t/s)", |
|
"Token Generation (mean)": "TG Avg (t/s)", |
|
"Token Generation (std)": "TG Std (t/s)", |
|
"Memory Usage (%) (mean)": "Memory Usage (%)", |
|
"Peak Memory (GB) (mean)": "Peak Memory (GB)", |
|
"PP Config (first)": "PP Config", |
|
"TG Config (first)": "TG Config", |
|
"Model Size (first)": "Model Size", |
|
"CPU Cores (first)": "CPU Cores", |
|
"Total Memory (GB) (first)": "Total Memory (GB)", |
|
"n_threads (first)": "n_threads", |
|
"flash_attn (first)": "flash_attn", |
|
"cache_type_k (first)": "cache_type_k", |
|
"cache_type_v (first)": "cache_type_v", |
|
"n_context (first)": "n_context", |
|
"n_batch (first)": "n_batch", |
|
"n_ubatch (first)": "n_ubatch", |
|
"Version (<lambda>)": "Version", |
|
} |
|
grouped_df = grouped_df.rename(columns=column_mapping) |
|
|
|
|
|
visible_cols = filters["visible_columns"] |
|
if visible_cols: |
|
|
|
column_name_mapping = { |
|
"Device": "Device", |
|
"Platform": "Platform", |
|
"CPU Cores": "CPU Cores", |
|
"Total Memory (GB)": "Total Memory (GB)", |
|
"Peak Memory (GB)": "Peak Memory (GB)", |
|
"Memory Usage (%)": "Memory Usage (%)", |
|
"PP Config": "PP Config", |
|
"TG Config": "TG Config", |
|
"Prompt Processing (mean)": "PP Avg (t/s)", |
|
"Token Generation (mean)": "TG Avg (t/s)", |
|
"Prompt Processing (std)": "PP Std (t/s)", |
|
"Token Generation (std)": "TG Std (t/s)", |
|
"Model": "Model ID", |
|
"Model Size": "Model Size", |
|
"Model ID": "Model ID", |
|
"n_threads": "n_threads", |
|
"flash_attn": "flash_attn", |
|
"cache_type_k": "cache_type_k", |
|
"cache_type_v": "cache_type_v", |
|
"n_context": "n_context", |
|
"n_batch": "n_batch", |
|
"n_ubatch": "n_ubatch", |
|
"Version": "Version", |
|
} |
|
|
|
|
|
mapped_visible = {column_name_mapping.get(col, col) for col in visible_cols} |
|
mapped_grouping = { |
|
column_name_mapping.get(col, col) for col in filters["grouping"] |
|
} |
|
|
|
|
|
all_cols = mapped_visible | mapped_grouping |
|
|
|
|
|
display_cols = [] |
|
|
|
|
|
available_cols = set(all_cols) |
|
|
|
|
|
for col in column_order: |
|
if col in available_cols: |
|
display_cols.append(col) |
|
|
|
|
|
remaining_cols = sorted(list(available_cols - set(display_cols))) |
|
display_cols.extend(remaining_cols) |
|
else: |
|
|
|
display_cols = column_order[:8] |
|
|
|
|
|
st.markdown("#### 📊 Benchmark Results") |
|
st.dataframe( |
|
grouped_df[display_cols], |
|
use_container_width=True, |
|
height=400, |
|
) |
|
|