Spaces:
Running
Running
| #!/usr/bin/env python | |
| import os | |
| import json | |
| from typing import List, Tuple | |
| os.environ["GRADIO_LANGUAGE"] = "en" | |
| RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR") | |
| if not RESULT_DIR: | |
| raise RuntimeError( | |
| "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py" | |
| ) | |
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| def f2(x): | |
| """Format to 2 decimal places if number, else return as-is.""" | |
| if isinstance(x, (int, float)): | |
| return round(float(x), 2) | |
| return x | |
| def json_to_row(path: str, metrics: dict) -> dict: | |
| model_name = metrics.get("model_name") | |
| if not model_name: | |
| model_name = "unknown-model" | |
| dataset = metrics.get("dataset", "Unknown") | |
| method = metrics.get("method", "Unknown") | |
| precision = metrics.get("precision", "Unknown") | |
| model_type = metrics.get("model_type", "Unknown") | |
| e2e_s = metrics.get("e2e_s", None) | |
| batch_size = metrics.get("batch_size", None) | |
| gpu_type = metrics.get("gpu_type", "") | |
| cost = metrics.get("cost", None) | |
| em = metrics.get("exact_match") | |
| correct = metrics.get("correct") | |
| total = metrics.get("total") | |
| if isinstance(correct, (int, float)) and isinstance(total, (int, float)) and total > 0: | |
| acc = correct / total | |
| else: | |
| acc = em | |
| def pct(x): | |
| return round(x * 100, 2) if isinstance(x, (int, float)) else None | |
| if isinstance(model_name, str) and "/" in model_name: | |
| hf_url = f"https://huggingface.co/{model_name}" | |
| model_cell = f"<a href='{hf_url}' target='_blank'>{model_name}</a>" | |
| else: | |
| model_cell = model_name | |
| row = { | |
| "Model": model_cell, | |
| "Dataset": dataset, | |
| "Method": method, | |
| "Model type": model_type, | |
| "Precision": precision, | |
| "E2E(s)": f2(e2e_s), | |
| "Batch size": batch_size, | |
| "GPU": gpu_type, | |
| "Accuracy(%)": pct(acc), | |
| "Cost($)": cost, | |
| "Decoding T/s": f2(metrics.get("decoding_throughput")), | |
| "Prefill T/s": f2(metrics.get("prefill_tp")), | |
| "Prefill<br>S-MBU(%)": pct(metrics.get("prefill_smbu")), | |
| "Prefill<br>S-MFU(%)": pct(metrics.get("prefill_smfu")), | |
| "Decoding<br>S-MBU(%)": pct(metrics.get("decoding_smbu")), | |
| "Decoding<br>S-MFU(%)": pct(metrics.get("decoding_smfu")), | |
| "TTFT(s)": f2(metrics.get("ttft")), | |
| "TPOT(s)": f2(metrics.get("tpot")), | |
| } | |
| return row | |
| def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None): | |
| if prev_rows is None: | |
| prev_rows = [] | |
| if not files and prev_rows: | |
| df = pd.DataFrame(prev_rows) | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}" | |
| table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>' | |
| return summary_md, table_html, prev_rows | |
| new_rows = [] | |
| if files: | |
| for f in files: | |
| path = f.name | |
| try: | |
| with open(path, "r", encoding="utf-8") as fp: | |
| metrics = json.load(fp) | |
| new_rows.append(json_to_row(path, metrics)) | |
| except Exception: | |
| continue | |
| all_rows = prev_rows + new_rows | |
| if not all_rows: | |
| empty_html = "<p>No files loaded.</p>" | |
| return "No files uploaded.", empty_html, [] | |
| df = pd.DataFrame(all_rows) | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}" | |
| table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>' | |
| return summary_md, table_html, all_rows | |
| def load_from_dir( | |
| dir_path: str, | |
| selected_tasks: List[str] | None = None, | |
| selected_frameworks: List[str] | None = None, | |
| selected_model_types: List[str] | None = None, | |
| selected_precisions: List[str] | None = None, | |
| search_keyword: str = "", | |
| force_refresh: bool = False, | |
| ): | |
| try: | |
| pattern = f"hf://datasets/{dir_path}/**/*.json" | |
| dl_mode = "force_redownload" if force_refresh else None | |
| print(f"Fetching from {pattern} (mode={dl_mode})...") | |
| ds = load_dataset( | |
| "json", | |
| data_files={"train": pattern}, | |
| split="train", | |
| download_mode=dl_mode, | |
| ) | |
| except Exception as e: | |
| empty_html = "<p>No files loaded or Dataset not found.</p>" | |
| return empty_html | |
| rows = [] | |
| for i, example in enumerate(ds): | |
| if isinstance(example, dict): | |
| metrics = example.get("metrics") or example.get("json") or example | |
| else: | |
| metrics = example | |
| rows.append(json_to_row(f"{dir_path}#{i}", metrics)) | |
| if not rows: | |
| empty_html = "<p>No records found.</p>" | |
| return empty_html | |
| df = pd.DataFrame(rows) | |
| # Dataset filter | |
| if selected_tasks is not None: | |
| lower_selected = [x.lower() for x in selected_tasks] | |
| df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)] | |
| # Inference framework filter (Method) | |
| if selected_frameworks is not None: | |
| lower_selected = [str(x).lower() for x in selected_frameworks] | |
| df = df[df["Method"].astype(str).str.lower().isin(lower_selected)] | |
| # Model type filter | |
| if selected_model_types is not None: | |
| lower_selected = [str(x).lower() for x in selected_model_types] | |
| df = df[df["Model type"].astype(str).str.lower().isin(lower_selected)] | |
| # Precision filter | |
| if selected_precisions is not None: | |
| lower_selected = [str(x).lower() for x in selected_precisions] | |
| df = df[df["Precision"].astype(str).str.lower().isin(lower_selected)] | |
| # Search keyword filter - search across all columns | |
| if search_keyword and search_keyword.strip(): | |
| keyword_lower = search_keyword.strip().lower() | |
| # Create a mask that checks if the keyword appears in any column | |
| mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(keyword_lower).any(), axis=1) | |
| df = df[mask] | |
| if df.empty: | |
| empty_html = "<p>No records found.</p>" | |
| return empty_html | |
| df = df.fillna("-") | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>' | |
| return table_html | |
| def auto_refresh_from_dir( | |
| dir_path: str, | |
| selected_tasks: List[str] | None = None, | |
| selected_frameworks: List[str] | None = None, | |
| selected_model_types: List[str] | None = None, | |
| selected_precisions: List[str] | None = None, | |
| search_keyword: str = "", | |
| ): | |
| return load_from_dir( | |
| dir_path, | |
| selected_tasks=selected_tasks, | |
| selected_frameworks=selected_frameworks, | |
| selected_model_types=selected_model_types, | |
| selected_precisions=selected_precisions, | |
| search_keyword=search_keyword, | |
| force_refresh=True, | |
| ) | |
| # Gradio UI | |
| def build_app() -> gr.Blocks: | |
| row_css = """ | |
| /* ============================================== | |
| GLOBAL RESET & THEME | |
| ============================================== */ | |
| body { | |
| background-color: #f5f7fa !important; | |
| } | |
| .gradio-container { | |
| max-width: 100% !important; | |
| padding: 20px !important; | |
| background-color: #f5f7fa !important; | |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif; | |
| } | |
| /* Force all main text to be dark */ | |
| .gradio-container, | |
| .gradio-container label, | |
| .gradio-container p, | |
| .gradio-container span, | |
| .gradio-container div { | |
| color: #24292e !important; | |
| } | |
| /* Explicitly force Headings to Black */ | |
| .gradio-container h1, | |
| .gradio-container h2, | |
| .gradio-container h3 { | |
| color: #24292e !important; | |
| font-weight: 700; | |
| } | |
| /* Fix h1 spacing */ | |
| .gradio-container h1 { | |
| margin-bottom: 24px; | |
| } | |
| /* Override default Gradio component backgrounds to White */ | |
| .gradio-container .block, | |
| .gradio-container .form, | |
| .gradio-container fieldset, | |
| .gradio-container .gr-box, | |
| .gradio-container .gr-form, | |
| .gradio-container .panel { | |
| background-color: white !important; | |
| border-color: #e1e4e8 !important; | |
| } | |
| /* Remove transparent wrapper backgrounds */ | |
| .gradio-container > div, | |
| .gradio-container .container, | |
| .gradio-container .wrap { | |
| background-color: transparent !important; | |
| } | |
| /* ============================================== | |
| SEARCH BOX (FIXED) | |
| ============================================== */ | |
| .search-box { | |
| background: white !important; | |
| padding: 16px !important; | |
| border-radius: 8px; | |
| border: 2px solid #e1e4e8 !important; | |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); | |
| margin-bottom: 16px; | |
| } | |
| /* Reset internal block styles so they don't overlap */ | |
| .search-box .block, | |
| .search-box .form, | |
| .search-box .wrap, | |
| .search-box .container { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| } | |
| /* Style the Label */ | |
| .search-box label span { | |
| color: #24292e !important; | |
| font-weight: 600; | |
| font-size: 14px; | |
| margin-bottom: 8px; | |
| display: block; | |
| } | |
| /* Style the Input Field itself */ | |
| .search-box input { | |
| background-color: white !important; | |
| color: #24292e !important; | |
| border: 1.5px solid #d1d5da !important; | |
| border-radius: 6px !important; | |
| padding: 10px !important; | |
| box-shadow: none !important; | |
| } | |
| .search-box input:focus { | |
| border-color: #0366d6 !important; | |
| outline: 2px solid rgba(3, 102, 214, 0.3) !important; | |
| } | |
| /* ============================================== | |
| FILTERS SECTION | |
| ============================================== */ | |
| .filter-section { | |
| background: white !important; | |
| padding: 0 !important; | |
| border-radius: 8px; | |
| border: 2px solid #e1e4e8 !important; | |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); | |
| } | |
| .filter-section .wrap { | |
| padding: 20px !important; | |
| background: transparent !important; | |
| } | |
| /* Checkbox styling */ | |
| .gradio-container input[type="checkbox"] { | |
| accent-color: #0366d6 !important; | |
| margin-right: 6px; | |
| } | |
| /* ============================================== | |
| ABOUT / INFO SECTION (ACCORDION) | |
| ============================================== */ | |
| .gradio-container .accordion { | |
| background: white !important; | |
| border: 2px solid #e1e4e8 !important; | |
| border-radius: 8px !important; | |
| box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); | |
| } | |
| /* Internal Info Section Content */ | |
| .info-section { | |
| padding: 16px; | |
| background: white !important; | |
| } | |
| /* Ensure all text inside About/Info is strictly black */ | |
| .info-section h3 { margin-top: 0; margin-bottom: 10px; } | |
| .info-section ul { padding-left: 20px; margin-bottom: 15px; } | |
| .info-section li { margin-bottom: 4px; } | |
| .info-section, | |
| .info-section p, | |
| .info-section li, | |
| .info-section strong, | |
| .info-section span { | |
| color: #24292e !important; | |
| } | |
| .info-section a { | |
| color: #0366d6 !important; | |
| text-decoration: none; | |
| } | |
| .info-section a:hover { text-decoration: underline; } | |
| /* ============================================== | |
| DASHBOARD TABLE | |
| ============================================== */ | |
| /* The Container - WIDER BORDER APPLIED HERE */ | |
| .table-container { | |
| overflow-x: auto; | |
| overflow-y: auto; | |
| max-height: 75vh; | |
| /* Changed from 2px to 4px for a wider border */ | |
| border: 4px solid #e1e4e8; | |
| border-radius: 8px; | |
| background: white; | |
| box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); | |
| } | |
| table.metrics-table { | |
| border-collapse: collapse; | |
| width: 100%; | |
| background: white; | |
| font-size: 13px; | |
| } | |
| table.metrics-table th, | |
| table.metrics-table td { | |
| padding: 12px 16px; | |
| border: 1px solid #e1e4e8; | |
| white-space: nowrap; | |
| text-align: left; | |
| color: #24292e !important; | |
| } | |
| table.metrics-table th { | |
| background: #f6f8fa; | |
| font-weight: 700; | |
| color: #24292e !important; | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| border-bottom: 2px solid #d1d5da; | |
| } | |
| table.metrics-table tbody tr:nth-child(even) { | |
| background-color: #fafbfc; | |
| } | |
| table.metrics-table tbody tr:hover { | |
| background-color: #f1f8ff; | |
| } | |
| table.metrics-table a { | |
| color: #0366d6 !important; | |
| text-decoration: none; | |
| font-weight: 500; | |
| } | |
| table.metrics-table a:hover { | |
| text-decoration: underline; | |
| } | |
| """ | |
| # Use Gradio's default (light) theme explicitly | |
| with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo: | |
| gr.Markdown("# MoE-CAP Dashboard") | |
| with gr.Row(): | |
| # Left side - Filters (narrower) | |
| with gr.Column(scale=2): | |
| with gr.Group(elem_classes="search-box"): | |
| search_input = gr.Textbox( | |
| label="π Search", | |
| placeholder="Search across all columns...", | |
| lines=1 | |
| ) | |
| with gr.Group(elem_classes="filter-section"): | |
| gr.Markdown("### ποΈ Filters") | |
| dir_path = gr.State(RESULT_DIR) | |
| # 1) Tasks filter | |
| task_filter = gr.CheckboxGroup( | |
| label="π Tasks", | |
| choices=[ | |
| ("GSM8K", "gsm8k"), | |
| ("LongBench", "longbench"), | |
| ("MMLU", "mmlu"), | |
| ("NuminaMath", "numinamath"), | |
| ("RULER", "ruler") | |
| ], | |
| value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"] | |
| ) | |
| # 2) Inference frameworks filter | |
| framework_filter = gr.CheckboxGroup( | |
| label="βοΈ Inference Frameworks", | |
| choices=["sglang", "vllm"], | |
| value=["sglang", "vllm"], | |
| ) | |
| # 3) Model types filter | |
| model_type_filter = gr.CheckboxGroup( | |
| label="π€ Model Types", | |
| choices=["instruct", "thinking"], | |
| value=["instruct", "thinking"], | |
| ) | |
| # 4) Precision filter | |
| precision_filter = gr.CheckboxGroup( | |
| label="π― Precision", | |
| choices=["bfloat16", "fp8"], | |
| value=["bfloat16", "fp8"], | |
| ) | |
| with gr.Accordion("π About Tasks & Metrics", open=False): | |
| gr.Markdown( | |
| "### Tasks\n" | |
| "- **GSM8K** β Mathematics Problem-Solving ([paper](https://arxiv.org/abs/2110-14168))\n" | |
| "- **LongBench** β Long-Context Understanding ([paper](https://arxiv.org/abs/2412.15204))\n" | |
| "- **MMLU** β Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n" | |
| "- **NuminaMath** β Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n" | |
| "- **RULER** β Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n" | |
| "### Metrics\n" | |
| "- **E2E(s)** β End-to-End Latency\n" | |
| "- **Accuracy(%)** β Task Accuracy\n" | |
| "- **Cost($)** β Inference Cost\n" | |
| "- **Decoding/Prefill T/s** β Throughput\n" | |
| "- **S-MBU/MFU(%)** β Hardware Utilization\n" | |
| "- **TTFT(s)** β Time To First Token\n" | |
| "- **TPOT(s)** β Time Per Output Token", | |
| elem_classes="info-section" | |
| ) | |
| # Right side - Table (wider) | |
| with gr.Column(scale=5): | |
| leaderboard_output = gr.HTML(label="π Results") | |
| demo.load( | |
| fn=auto_refresh_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| search_input.change( | |
| fn=load_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| task_filter.change( | |
| fn=load_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| framework_filter.change( | |
| fn=load_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| model_type_filter.change( | |
| fn=load_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| precision_filter.change( | |
| fn=load_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| timer = gr.Timer(60.0) | |
| timer.tick( | |
| fn=auto_refresh_from_dir, | |
| inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input], | |
| outputs=[leaderboard_output], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch() |