|
import asyncio |
|
import streamlit as st |
|
import pandas as pd |
|
from typing import Optional, List, Set, Tuple, Dict, Any |
|
|
|
from .components.filters import render_table_filters |
|
from .components.visualizations import ( |
|
render_leaderboard_table, |
|
render_performance_plots, |
|
render_device_rankings, |
|
) |
|
from .components.header import render_header, render_contribution_guide |
|
from .components.rankings import render_algorithm_rankings |
|
from .components.device_comparison import render_device_comparison |
|
from .services.firebase import fetch_leaderboard_data |
|
from .core.styles import CUSTOM_CSS |
|
from .core.scoring import ( |
|
calculate_performance_score, |
|
get_performance_metrics, |
|
StandardBenchmarkConditions, |
|
) |
|
|
|
|
|
def get_filter_values( |
|
df: pd.DataFrame, |
|
) -> tuple[ |
|
List[str], |
|
List[str], |
|
List[str], |
|
List[str], |
|
List[str], |
|
Tuple[int, int], |
|
Tuple[int, int], |
|
Tuple[int, int], |
|
List[str], |
|
int, |
|
]: |
|
"""Get unique values for filters""" |
|
models = sorted(df["Model ID"].unique().tolist()) |
|
platforms = sorted(df["Platform"].unique().tolist()) |
|
devices = sorted(df["Device"].unique().tolist()) |
|
cache_type_v = sorted(df["cache_type_v"].unique().tolist()) |
|
cache_type_k = sorted(df["cache_type_k"].unique().tolist()) |
|
n_threads = (df["n_threads"].min(), df["n_threads"].max()) |
|
max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist())) |
|
pp_range = (df["PP Config"].min(), df["PP Config"].max()) |
|
tg_range = (df["TG Config"].min(), df["TG Config"].max()) |
|
versions = sorted(df["Version"].unique().tolist()) |
|
return ( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) |
|
|
|
|
|
def render_performance_metrics(metrics: Dict[str, Any]): |
|
"""Render performance metrics in a nice grid""" |
|
st.markdown("### π Performance Overview") |
|
|
|
col1, col2, col3, col4, col5 = st.columns(5) |
|
|
|
with col1: |
|
st.metric("Top Device", metrics["top_device"]) |
|
with col2: |
|
st.metric("Top Score", f"{metrics['top_score']:.1f}") |
|
with col3: |
|
st.metric("Average Score", f"{metrics['avg_score']:.1f}") |
|
with col4: |
|
st.metric("Total Devices", metrics["total_devices"]) |
|
with col5: |
|
st.metric("Total Models", metrics["total_models"]) |
|
|
|
|
|
async def main(): |
|
"""Main application entry point""" |
|
st.set_page_config( |
|
page_title="AI Phone Benchmark Leaderboard", |
|
page_icon="π±", |
|
layout="wide", |
|
) |
|
|
|
|
|
st.markdown(CUSTOM_CSS, unsafe_allow_html=True) |
|
|
|
|
|
df = await fetch_leaderboard_data() |
|
|
|
if df.empty: |
|
st.error("No data available. Please check your connection and try again.") |
|
return |
|
|
|
|
|
df = calculate_performance_score(df) |
|
metrics = get_performance_metrics(df) |
|
|
|
|
|
render_header() |
|
|
|
|
|
( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) = get_filter_values(df) |
|
|
|
|
|
if "show_guide" not in st.session_state: |
|
st.session_state.show_guide = True |
|
|
|
main_col, guide_col = st.columns( |
|
[ |
|
0.9 if not st.session_state.show_guide else 0.8, |
|
0.1 if not st.session_state.show_guide else 0.2, |
|
] |
|
) |
|
|
|
with main_col: |
|
|
|
tab1, tab2, tab3 = st.tabs( |
|
[ |
|
"Device Rankings", |
|
"Benchmark Results", |
|
"βοΈ Device Duel", |
|
] |
|
) |
|
|
|
with tab1: |
|
|
|
st.title("π Device Rankings") |
|
|
|
|
|
std = StandardBenchmarkConditions() |
|
st.info( |
|
f"π Rankings are based on benchmarks with standard conditions: " |
|
f"PP={std.PP_CONFIG} tokens, TG={std.TG_CONFIG} tokens. " |
|
f"The rankings are based on the Glicko-2 algorithm." |
|
) |
|
|
|
|
|
|
|
|
|
|
|
render_device_rankings(df) |
|
|
|
with tab2: |
|
|
|
table_filters = render_table_filters( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) |
|
|
|
|
|
render_leaderboard_table(df, table_filters) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
render_performance_plots(df, table_filters) |
|
|
|
with tab3: |
|
|
|
|
|
normalized_device_ids = sorted(df["Normalized Device ID"].unique().tolist()) |
|
render_device_comparison(df, normalized_device_ids) |
|
|
|
with guide_col: |
|
render_contribution_guide() |
|
|
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |
|
|