Spaces:
Running
Running
| from pathlib import Path | |
| import altair as alt | |
| import polars as pl | |
| import gradio as gr | |
| DATASETS = [] | |
| BENCHMARKS = { | |
| # Name: (device, AMP, compile, single thread) | |
| "Parameters": (None, None, None, None), | |
| "GPU Memory": (None, None, None, None), | |
| "CPU rate": ("cpu", False, False, False), | |
| "CPU rate single core": ("cpu", False, False, True), | |
| "CPU rate with compile": ("cpu", False, True, False), | |
| "CPU rate AMP with compile": ("cpu", True, True, False), | |
| "CUDA rate": ("cuda", False, False, False), | |
| "CUDA rate with compile": ("cuda", False, True, False), | |
| "CUDA rate AMP with compile": ("cuda", True, True, False), | |
| } | |
| def plot_acc_param(param_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart: | |
| df = param_compare_results_df.select( | |
| "Model name", | |
| "Model type", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Resolution", | |
| "Parameters (M)", | |
| "Pareto frontier (p)", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ) | |
| base = df.plot.point( | |
| x="Parameters (M)", | |
| y="Accuracy", | |
| color="Model type", | |
| shape="Resolution:N", | |
| tooltip=[ | |
| "Parameters (M)", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Model name", | |
| "Model type", | |
| "Resolution", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ], | |
| ) | |
| text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name") | |
| frontier = df.plot.line(x="Parameters (M)", y="Pareto frontier (p)").mark_line( | |
| interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2) | |
| ) | |
| chart = base + text + frontier | |
| return chart.properties(title="Accuracy vs Parameter Count", width=width, height=height).configure_scale(zero=False) | |
| def plot_acc_memory(memory_compare_results_df: pl.DataFrame, width: int = 900, height: int = 640) -> alt.LayerChart: | |
| if len(memory_compare_results_df) > 0: | |
| batch_size = memory_compare_results_df["max_batch_size"][0] | |
| amp = memory_compare_results_df["amp"][0] | |
| else: | |
| batch_size = "" | |
| amp = "" | |
| df = memory_compare_results_df.select( | |
| "Model name", | |
| "Model type", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Resolution", | |
| "Peak GPU memory (MB)", | |
| "Parameters (M)", | |
| "Pareto frontier (mem)", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ) | |
| base = df.plot.point( | |
| x="Peak GPU memory (MB)", | |
| y="Accuracy", | |
| color="Model type", | |
| shape="Resolution:N", | |
| tooltip=[ | |
| "Peak GPU memory (MB)", | |
| "Parameters (M)", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Model name", | |
| "Model type", | |
| "Resolution", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ], | |
| ) | |
| text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name") | |
| frontier = df.plot.line(x="Peak GPU memory (MB)", y="Pareto frontier (mem)").mark_line( | |
| interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2) | |
| ) | |
| chart = base + text + frontier | |
| return chart.properties( | |
| title=f"Accuracy vs GPU Memory (batch size={batch_size}, amp={amp})", width=width, height=height | |
| ).configure_scale(zero=False) | |
| def plot_acc_rate(rate_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart: | |
| if len(rate_compare_results_df) > 0: | |
| device = rate_compare_results_df["device"][0] | |
| compiled = rate_compare_results_df["compile"][0] | |
| batch_size = rate_compare_results_df["max_batch_size"][0] | |
| amp = rate_compare_results_df["amp"][0] | |
| single_thread = rate_compare_results_df["single_thread"][0] | |
| else: | |
| device = "" | |
| compiled = "" | |
| batch_size = "" | |
| amp = "" | |
| single_thread = False | |
| df = rate_compare_results_df.select( | |
| "Model name", | |
| "Model type", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Resolution", | |
| "ms / sample", | |
| "Parameters (M)", | |
| "Pareto frontier (ms)", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ) | |
| base = df.plot.point( | |
| x="ms / sample", | |
| y="Accuracy", | |
| color="Model type", | |
| shape="Resolution:N", | |
| tooltip=[ | |
| "ms / sample", | |
| "Parameters (M)", | |
| "Accuracy", | |
| "Top-3 accuracy", | |
| "Model name", | |
| "Model type", | |
| "Resolution", | |
| "Intermediate", | |
| "MIM", | |
| "Distilled", | |
| ], | |
| ) | |
| text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name") | |
| frontier = df.plot.line(x="ms / sample", y="Pareto frontier (ms)").mark_line( | |
| interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2) | |
| ) | |
| chart = base + text + frontier | |
| if single_thread is True: | |
| single_thread_title = " Single Core" | |
| else: | |
| single_thread_title = "" | |
| return chart.properties( | |
| title=( | |
| f"Accuracy vs {device.upper()}{single_thread_title} Rate (compile={compiled}, " | |
| f"batch size={batch_size}, amp={amp})" | |
| ), | |
| width=width, | |
| height=height, | |
| ).configure_scale(zero=False) | |
| def update_data( | |
| dataset: str, benchmark: str, intermediate: bool, mim: bool, dist: bool, log_x: bool, search_bar: str | |
| ) -> tuple[alt.LayerChart, pl.DataFrame]: | |
| compare_results_df = pl.read_csv(f"results_{dataset}.csv") | |
| if intermediate is False: | |
| compare_results_df = compare_results_df.filter(pl.col("Intermediate") == intermediate) | |
| if mim is False: | |
| compare_results_df = compare_results_df.filter(pl.col("MIM") == mim) | |
| if dist is False: | |
| compare_results_df = compare_results_df.filter(pl.col("Distilled") == dist) | |
| x_scale_type = "log" if log_x is True else "linear" | |
| # Filter models | |
| compare_results_df = compare_results_df.filter(pl.col("Model name").str.contains(search_bar)) | |
| # Parameter count | |
| if benchmark == "Parameters": | |
| param_compare_results_df = compare_results_df.unique(subset=["Model name", "Resolution"]).sort( | |
| "Parameters (M)", descending=False | |
| ) | |
| param_compare_results_df = param_compare_results_df.with_columns( | |
| pl.col("Accuracy").cum_max().alias("Pareto frontier (p)") | |
| ) | |
| param_compare_results_df = param_compare_results_df.drop( | |
| "Samples / sec", "device", "ms / sample", "Peak GPU memory (MB)" | |
| ) | |
| chart = plot_acc_param(param_compare_results_df) | |
| x_max = param_compare_results_df["Parameters (M)"].quantile(0.9) | |
| x_min = param_compare_results_df["Parameters (M)"].quantile(0.1) | |
| chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type) | |
| output_df = param_compare_results_df | |
| # Peak memory | |
| elif benchmark == "GPU Memory": | |
| memory_compare_results_df = compare_results_df.drop_nulls(subset=["Peak GPU memory (MB)"]) | |
| memory_compare_results_df = memory_compare_results_df.unique(subset=["Model name", "Resolution"]).sort( | |
| "Peak GPU memory (MB)", descending=False | |
| ) | |
| memory_compare_results_df = memory_compare_results_df.with_columns( | |
| pl.col("Accuracy").cum_max().alias("Pareto frontier (mem)") | |
| ) | |
| memory_compare_results_df = memory_compare_results_df.drop("Samples / sec", "device", "ms / sample") | |
| chart = plot_acc_memory(memory_compare_results_df) | |
| x_max = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.9) | |
| x_min = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.1) | |
| chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type) | |
| output_df = memory_compare_results_df | |
| # Rate | |
| else: | |
| (device, amp_enabled, compiled, single_thread) = BENCHMARKS[benchmark] | |
| df = compare_results_df.drop_nulls(subset=["ms / sample"]) | |
| df = df.filter(device=device, amp=amp_enabled, compile=compiled, single_thread=single_thread) | |
| device_compare_results_df = df.unique(subset=["Model name", "Resolution"]).sort("ms / sample", descending=False) | |
| device_compare_results_df = device_compare_results_df.drop("Peak GPU memory (MB)") | |
| device_compare_results_df = device_compare_results_df.with_columns( | |
| pl.col("Accuracy").cum_max().alias("Pareto frontier (ms)") | |
| ) | |
| chart = plot_acc_rate(device_compare_results_df) | |
| x_max = device_compare_results_df["ms / sample"].quantile(0.95) | |
| x_min = device_compare_results_df["ms / sample"].min() | |
| if x_max is not None and x_min is not None: | |
| x_max = x_max * 1.04 | |
| x_min = x_min * 0.96 | |
| chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type) | |
| output_df = device_compare_results_df | |
| output_df = output_df.select( | |
| [ | |
| pl.col(col).round(4) if output_df.schema[col] in [pl.Float32, pl.Float64] else col | |
| for col in output_df.columns | |
| ] | |
| ) | |
| return (chart, output_df.drop("Mistakes", "Samples", "torch_version")) | |
| def app() -> None: | |
| with gr.Blocks(title="Birder Leaderboard", analytics_enabled=False) as leaderboard: | |
| gr.HTML("<center><h1>The Birder Leaderboard</h1></center>") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pass | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| Leaderboard of all the pre-trained Birder models across multiple datasets. | |
| ### Benchmark Setup | |
| * GPU: A5000 ADA Generation | |
| * CPU: AMD Ryzen Threadripper PRO 7975WX | |
| * PyTorch version: 2.7.1+cu128 | |
| ### Dataset Information | |
| | Name | Training samples | Validation samples | Classes | | |
| |---------------------|------------------|--------------------|-------------| | |
| | arabian-peninsula | 583,868 | 21,634 | 735 | | |
| | eu-common | 569,784 | 19,869 | 707 | | |
| | il-all | 462,430 | 18,621 | 550 | | |
| | il-common | 330,880 | 15,828 | 371 | | |
| """ | |
| ) | |
| with gr.Column(): | |
| pass | |
| with gr.Row(): | |
| with gr.Column(): | |
| pass | |
| with gr.Column(): | |
| dataset_dropdown = gr.Dropdown( | |
| choices=DATASETS, | |
| label="Select Dataset", | |
| value=DATASETS[0] if DATASETS else None, | |
| ) | |
| benchmark_dropdown = gr.Dropdown( | |
| choices=BENCHMARKS.keys(), | |
| label="Select Benchmark", | |
| value=next(iter(BENCHMARKS.keys())) if BENCHMARKS else None, | |
| filterable=False, | |
| ) | |
| with gr.Column(): | |
| intermediate = gr.Checkbox( | |
| label="Intermediate", | |
| value=True, | |
| info="Show models that underwent intermediate training (extra data)", | |
| ) | |
| mim = gr.Checkbox(label="MIM", value=True, info="Show models with Masked Image Modeling pre-training") | |
| dist = gr.Checkbox(label="Distilled", value=True, info="Show distilled models") | |
| log_x = gr.Checkbox(label="Log scale X-axis", value=False) | |
| with gr.Column(): | |
| pass | |
| with gr.Row(): | |
| with gr.Column(): | |
| pass | |
| with gr.Column(scale=2): | |
| search_bar = gr.Textbox(label="Model Filter", placeholder="e.g. convnext, efficient|mobile") | |
| with gr.Column(): | |
| pass | |
| plot = gr.Plot(container=False) | |
| table = gr.Dataframe(show_search="search") | |
| inputs = [dataset_dropdown, benchmark_dropdown, intermediate, mim, dist, log_x, search_bar] | |
| outputs = [plot, table] | |
| leaderboard.load(update_data, inputs=inputs, outputs=outputs) | |
| dataset_dropdown.change(update_data, inputs=inputs, outputs=outputs) | |
| benchmark_dropdown.change(update_data, inputs=inputs, outputs=outputs) | |
| intermediate.change(update_data, inputs=inputs, outputs=outputs) | |
| mim.change(update_data, inputs=inputs, outputs=outputs) | |
| dist.change(update_data, inputs=inputs, outputs=outputs) | |
| log_x.change(update_data, inputs=inputs, outputs=outputs) | |
| search_bar.change(update_data, inputs=inputs, outputs=outputs) | |
| leaderboard.launch() | |
| # Launch the app | |
| if __name__ == "__main__": | |
| file_info = [] | |
| for p in Path.glob(Path("."), "results_*.csv"): | |
| file_info.append((p.stat().st_size, p.stem.removeprefix("results_"))) | |
| DATASETS = [dataset_name for _, dataset_name in sorted(file_info, key=lambda x: x[1], reverse=True)] | |
| app() | |