import streamlit as st import plotly.express as px import pandas as pd from typing import Optional, Dict, List, Set def create_performance_plot( df: pd.DataFrame, metric: str, title: str, hover_data: List[str] = None ): """Create a performance comparison plot""" if df.empty: return None if hover_data is None: hover_data = ["CPU Cores", "Peak Memory (GB)"] fig = px.bar( df, x="Device", y=metric, color="Platform", title=title, template="plotly_white", barmode="group", hover_data=hover_data, ) fig.update_layout( xaxis_title="Device", yaxis_title="Token/sec", legend_title="Platform", plot_bgcolor="white", height=400, ) return fig def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame: """Apply all filters to the dataframe""" if df.empty: return df filtered_df = df.copy() # Basic filters if filters["model"] != "All": filtered_df = filtered_df[filtered_df["Model ID"] == filters["model"]] if filters["platform"] != "All": filtered_df = filtered_df[filtered_df["Platform"] == filters["platform"]] if filters["device"] != "All": filtered_df = filtered_df[filtered_df["Device"] == filters["device"]] # Flash Attention filter if filters["flash_attn"] != "All": filtered_df = filtered_df[filtered_df["flash_attn"] == filters["flash_attn"]] # Cache Type filters if filters["cache_type_k"] != "All": filtered_df = filtered_df[ filtered_df["cache_type_k"] == filters["cache_type_k"] ] if filters["cache_type_v"] != "All": filtered_df = filtered_df[ filtered_df["cache_type_v"] == filters["cache_type_v"] ] # Range filters pp_min, pp_max = filters["pp_range"] if pp_min is not None and pp_max is not None: pp_values = filtered_df["PP Config"] filtered_df = filtered_df[(pp_values >= pp_min) & (pp_values <= pp_max)] tg_min, tg_max = filters["tg_range"] if tg_min is not None and tg_max is not None: tg_values = filtered_df["TG Config"] filtered_df = filtered_df[(tg_values >= tg_min) & (tg_values <= tg_max)] n_threads_min, n_threads_max = filters["n_threads"] if n_threads_min is not None and n_threads_max is not None: n_threads = filtered_df["n_threads"] filtered_df = filtered_df[ (n_threads >= n_threads_min) & (n_threads <= n_threads_max) ] n_gpu_layers_min, n_gpu_layers_max = filters["n_gpu_layers"] if n_gpu_layers_min is not None and n_gpu_layers_max is not None: n_gpu_layers = filtered_df["n_gpu_layers"] filtered_df = filtered_df[ (n_gpu_layers >= n_gpu_layers_min) & (n_gpu_layers <= n_gpu_layers_max) ] # Version filter - handle multiple selections if filters.get("Version") != "All" and filters.get("Version"): filtered_df = filtered_df[filtered_df["Version"] == filters["Version"]] return filtered_df def render_performance_plots(df: pd.DataFrame, filters: Dict): """Render performance comparison plots""" if df.empty: st.warning("No data available for plotting.") return # Apply filters filtered_df = filter_dataframe(df, filters) if filtered_df.empty: st.warning("No data matches the selected filters for plotting.") return ## # Extract PP/TG values if not already present ## if "PP Value" not in filtered_df.columns: ## filtered_df["PP Value"] = filtered_df["Benchmark"].apply( ## lambda x: int(x.split("pp: ")[1].split(",")[0]) ## ) ## if "TG Value" not in filtered_df.columns: ## filtered_df["TG Value"] = filtered_df["Benchmark"].apply( ## lambda x: int(x.split("tg: ")[1].split(")")[0]) ## ) # Build aggregation dictionary based on available columns agg_dict = {} # Always include performance metrics agg_dict.update( { "Prompt Processing": "mean", "Token Generation": "mean", } ) # Include memory metrics if available if "Memory Usage (%)" in filtered_df.columns: agg_dict["Memory Usage (%)"] = "mean" if "Peak Memory (GB)" in filtered_df.columns: agg_dict["Peak Memory (GB)"] = "mean" # Include device info if available if "CPU Cores" in filtered_df.columns: agg_dict["CPU Cores"] = "first" # Include config values agg_dict.update( { "PP Config": "first", "TG Config": "first", } ) # Group by device and platform for plotting plot_group = filtered_df.groupby(["Device", "Platform"]).agg(agg_dict).reset_index() # Flatten column names and rename them # plot_group.columns = [ # col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in plot_group.columns # ] # print("plot_group2:", plot_group) # Rename columns for display column_mapping = { "Prompt Processing": "PP Avg (t/s)", # "Prompt Processing (std)": "PP Std (t/s)", "Prompt Processing (count)": "Runs", "Token Generation": "TG Avg (t/s)", # "Token Generation (std)": "TG Std (t/s)", "Memory Usage (%) (mean)": "Memory Usage (%)", "Peak Memory (GB) (mean)": "Peak Memory (GB)", "PP Config (first)": "PP Config", "TG Config (first)": "TG Config", "Model Size (first)": "Model Size", "CPU Cores (first)": "CPU Cores", "Total Memory (GB) (first)": "Total Memory (GB)", "n_threads (first)": "n_threads", "flash_attn (first)": "flash_attn", "cache_type_k (first)": "cache_type_k", "cache_type_v (first)": "cache_type_v", "n_context (first)": "n_context", "n_batch (first)": "n_batch", "n_ubatch (first)": "n_ubatch", } plot_group = plot_group.rename(columns=column_mapping) # Define hover data based on available columns hover_data = [] if "CPU Cores" in plot_group.columns: hover_data.append("CPU Cores") if "Peak Memory (GB)" in plot_group.columns: hover_data.append("Peak Memory (GB)") # Create plots col1, col2 = st.columns(2) with col1: fig1 = create_performance_plot( plot_group, "PP Avg (t/s)", f"Prompt Processing (PP: {plot_group['PP Config'].iloc[0]})", hover_data=hover_data, ) if fig1: st.plotly_chart(fig1, use_container_width=True) with col2: fig2 = create_performance_plot( plot_group, "TG Avg (t/s)", f"Token Generation (TG: {plot_group['TG Config'].iloc[0]})", hover_data=hover_data, ) if fig2: st.plotly_chart(fig2, use_container_width=True) def render_leaderboard_table(df: pd.DataFrame, filters: Dict): """Render the leaderboard table with grouped and formatted data""" if df.empty: st.warning("No data available for the selected filters.") return # Apply filters filtered_df = filter_dataframe(df, filters) if filtered_df.empty: st.warning("No data matches the selected filters.") return # Define the preferred column order (grouped logically) column_order = [ # Device Info "Device", "Platform", "CPU Cores", "Total Memory (GB)", "Peak Memory (GB)", "Memory Usage (%)" # Benchmark Results "PP Config", "PP Avg (t/s)", "PP Std (t/s)", "TG Config", "TG Avg (t/s)", "TG Std (t/s)", # Model Config "Model ID", "Model Size", "n_threads", "flash_attn", "cache_type_k", "cache_type_v", "n_context", "n_batch", "n_ubatch", "Version", ] # Group by selected columns grouping_cols = filters["grouping"] if not grouping_cols: grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping # Create aggregations (excluding grouping columns) agg_dict = { col: agg for col, agg in { "Prompt Processing": ["mean", "std"], "Token Generation": ["mean", "std"], "Peak Memory (GB)": "mean", "Total Memory (GB)": "first", "CPU Cores": "first", "Model Size": "first", "Version": lambda x: ", ".join(sorted(set(x))), "n_gpu_layers": lambda x: ", ".join(sorted(set(str(x)))), }.items() if col not in grouping_cols } # Group and aggregate grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index() # Flatten column names grouped_df.columns = [ col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns ] # Rename columns for display column_mapping = { "Prompt Processing (mean)": "PP Avg (t/s)", "Prompt Processing (std)": "PP Std (t/s)", "Token Generation (mean)": "TG Avg (t/s)", "Token Generation (std)": "TG Std (t/s)", "Memory Usage (%) (mean)": "Memory Usage (%)", "Peak Memory (GB) (mean)": "Peak Memory (GB)", "PP Config (first)": "PP Config", "TG Config (first)": "TG Config", "Model Size (first)": "Model Size", "CPU Cores (first)": "CPU Cores", "Total Memory (GB) (first)": "Total Memory (GB)", "n_threads (first)": "n_threads", "flash_attn (first)": "flash_attn", "cache_type_k (first)": "cache_type_k", "cache_type_v (first)": "cache_type_v", "n_context (first)": "n_context", "n_batch (first)": "n_batch", "n_ubatch (first)": "n_ubatch", "Version ()": "Version", } grouped_df = grouped_df.rename(columns=column_mapping) # Filter visible columns visible_cols = filters["visible_columns"] if visible_cols: # Map the user-friendly names to actual column names column_name_mapping = { "Device": "Device", "Platform": "Platform", "CPU Cores": "CPU Cores", "Total Memory (GB)": "Total Memory (GB)", "Peak Memory (GB)": "Peak Memory (GB)", "Memory Usage (%)": "Memory Usage (%)", "PP Config": "PP Config", "TG Config": "TG Config", "Prompt Processing (mean)": "PP Avg (t/s)", "Token Generation (mean)": "TG Avg (t/s)", "Prompt Processing (std)": "PP Std (t/s)", "Token Generation (std)": "TG Std (t/s)", "Model": "Model ID", "Model Size": "Model Size", "Model ID": "Model ID", "n_threads": "n_threads", "flash_attn": "flash_attn", "cache_type_k": "cache_type_k", "cache_type_v": "cache_type_v", "n_context": "n_context", "n_batch": "n_batch", "n_ubatch": "n_ubatch", "Version": "Version", } # Convert visible columns and grouping columns to their mapped names mapped_visible = {column_name_mapping.get(col, col) for col in visible_cols} mapped_grouping = { column_name_mapping.get(col, col) for col in filters["grouping"] } # Combine both sets to get unique columns all_cols = mapped_visible | mapped_grouping # Create final display columns list display_cols = [] # Get all available columns we want to display available_cols = set(all_cols) # Add columns in the predefined order for col in column_order: if col in available_cols: display_cols.append(col) # Add any remaining columns that weren't in our predefined order remaining_cols = sorted(list(available_cols - set(display_cols))) display_cols.extend(remaining_cols) else: # Default columns if none selected display_cols = column_order[:8] # First 8 columns from the predefined order # Display the filtered and grouped table st.markdown("#### 📊 Benchmark Results") st.dataframe( grouped_df[display_cols], use_container_width=True, height=400, )