Spaces:
Running
Running
| #!/usr/bin/env python | |
| import os | |
| os.environ["GRADIO_LANGUAGE"] = "en" | |
| RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR") | |
| if not RESULT_DIR: | |
| raise RuntimeError( | |
| "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR before running app.py" | |
| ) | |
| import json | |
| from typing import List, Tuple | |
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| def f2(x): | |
| """Format to 2 decimal places if number, else return as-is.""" | |
| if isinstance(x, (int, float)): | |
| return round(float(x), 2) | |
| return x | |
| def json_to_row(path: str, metrics: dict) -> dict: | |
| model_name = metrics.get("model_name") | |
| if not model_name: | |
| model_name = "unknown-model" | |
| dataset = metrics.get("dataset", "gsm8k") | |
| method = metrics.get("method", "") | |
| precision = metrics.get("precision", "") | |
| gsm8k_e2e = metrics.get("gsm8k_e2e_s", None) | |
| gsm8k_bs = metrics.get("gsm8k_bs", None) | |
| gsm8k_gpu = metrics.get("gpu_type", "") | |
| em = metrics.get("exact_match") | |
| correct = metrics.get("correct") | |
| total = metrics.get("total") | |
| if isinstance(correct, (int, float)) and isinstance(total, (int, float)) and total > 0: | |
| acc = correct / total | |
| else: | |
| acc = em | |
| def pct(x): | |
| return round(x * 100, 2) if isinstance(x, (int, float)) else None | |
| if isinstance(model_name, str) and "/" in model_name: | |
| hf_url = f"https://huggingface.co/{model_name}" | |
| model_cell = f"<a href='{hf_url}' target='_blank'>{model_name}</a>" | |
| else: | |
| model_cell = model_name | |
| row = { | |
| "Model": model_cell, | |
| "Dataset": dataset, | |
| "Method": method, | |
| "Precision": precision, | |
| "GSM8K<br>E2E(s)": f2(gsm8k_e2e), | |
| "GSM8K<br>bs": gsm8k_bs, | |
| "GSM8K<br>GPU": gsm8k_gpu, | |
| "GSM8K<br>Accuracy(%)": pct(acc), | |
| "GSM8K<br>Decoding T/s": f2(metrics.get("decoding_throughput")), | |
| "GSM8K<br>Prefill T/s": f2(metrics.get("prefill_tp")), | |
| "GSM8K<br>Prefill<br>S-MBU(%)": pct(metrics.get("prefill_smbu")), | |
| "GSM8K<br>Prefill<br>S-MFU(%)": pct(metrics.get("prefill_smfu")), | |
| "GSM8K<br>Decoding<br>S-MBU(%)": pct(metrics.get("decoding_smbu")), | |
| "GSM8K<br>Decoding<br>S-MFU(%)": pct(metrics.get("decoding_smfu")), | |
| "TTFT(s)": f2(metrics.get("ttft")), | |
| "TPOT(s)": f2(metrics.get("tpot")), | |
| } | |
| return row | |
| # uoload | |
| def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None): | |
| if prev_rows is None: | |
| prev_rows = [] | |
| if not files and prev_rows: | |
| df = pd.DataFrame(prev_rows) | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}" | |
| table_html = df.to_html(escape=False, index=False, classes="metrics-table") | |
| return summary_md, table_html, prev_rows | |
| new_rows = [] | |
| if files: | |
| for f in files: | |
| path = f.name | |
| try: | |
| with open(path, "r", encoding="utf-8") as fp: | |
| metrics = json.load(fp) | |
| new_rows.append(json_to_row(path, metrics)) | |
| except Exception: | |
| continue | |
| all_rows = prev_rows + new_rows | |
| if not all_rows: | |
| empty_html = "<p>No files loaded.</p>" | |
| return "No files uploaded.", empty_html, [] | |
| df = pd.DataFrame(all_rows) | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}" | |
| table_html = df.to_html(escape=False, index=False, classes="metrics-table") | |
| return summary_md, table_html, all_rows | |
| def load_from_dir(dir_path: str): | |
| try: | |
| pattern = f"hf://datasets/{dir_path}/**/*.json" | |
| ds = load_dataset("json", data_files={"train": pattern}, split="train") | |
| except Exception as e: | |
| empty_html = "<p>No files loaded.</p>" | |
| return f"Failed to load dataset `{dir_path}`: {e}", empty_html | |
| rows = [] | |
| for i, example in enumerate(ds): | |
| if isinstance(example, dict): | |
| metrics = example.get("metrics") or example.get("json") or example | |
| else: | |
| metrics = example | |
| rows.append(json_to_row(f"{dir_path}#{i}", metrics)) | |
| if not rows: | |
| empty_html = "<p>No records found.</p>" | |
| return f"No records found in dataset `{dir_path}`.", empty_html | |
| df = pd.DataFrame(rows) | |
| raw_models = set() | |
| for cell in df["Model"].tolist(): | |
| if isinstance(cell, str) and "href" in cell: | |
| try: | |
| name = cell.split(">", 1)[1].split("<", 1)[0] | |
| except Exception: | |
| name = cell | |
| else: | |
| name = cell | |
| raw_models.add(name) | |
| links = [] | |
| for name in sorted(raw_models): | |
| if isinstance(name, str) and "/" in name: | |
| hf_url = f"https://huggingface.co/{name}" | |
| links.append(f"[{name}]({hf_url})") | |
| else: | |
| links.append(str(name)) | |
| models_str = ", ".join(links) | |
| summary_md = ( | |
| f"**Loaded {len(rows)} result files from dataset `{dir_path}`.** \n" | |
| f"**Models:** {models_str}" | |
| ) | |
| table_html = df.to_html(escape=False, index=False, classes="metrics-table") | |
| return summary_md, table_html | |
| # Gradio UI | |
| def build_app() -> gr.Blocks: | |
| row_css = """ | |
| .gradio-container table.metrics-table th, | |
| .gradio-container table.metrics-table td { | |
| padding-top: 10px; | |
| padding-bottom: 10px; | |
| padding-left: 8px; | |
| padding-right: 8px; | |
| border: 1px solid #e5e7eb; | |
| } | |
| .gradio-container table.metrics-table { | |
| border-collapse: collapse; | |
| width: 100%; | |
| } | |
| """ | |
| with gr.Blocks(title="MoE-CAP Dashboard", css=row_css) as demo: | |
| gr.Markdown("# MoE-CAP Dashboard") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown( | |
| "### Tasks\n" | |
| "- Mathematics Problem-Solving Performance — " | |
| "[**GSM8K**](https://arxiv.org/abs/2110-14168)\n\n" | |
| "### Columns and Metrics\n" | |
| "- Model \n" | |
| "- Dataset \n" | |
| "- Method \n" | |
| "- Precision \n" | |
| "- GSM8K E2E (s) \n" | |
| "- GSM8K Batch Size \n" | |
| "- GPU Type \n" | |
| "- GSM8K Accuracy (%) \n" | |
| "- Decoding Throughput (tokens/s) \n" | |
| "- Prefill Throughput (tokens/s) \n" | |
| "- Prefill S-MBU (%) \n" | |
| "- Prefill S-MFU (%) \n" | |
| "- Decoding S-MBU (%) \n" | |
| "- Decoding S-MFU (%) \n" | |
| "- TTFT (s) \n" | |
| "- TPOT (s)" | |
| ) | |
| with gr.Column(scale=1): | |
| # manual upload | |
| # files_input = gr.Files( | |
| # label="Upload `cap_metrics_*.json` files", | |
| # file_types=[".json"], | |
| # file_count="multiple", | |
| # ) | |
| # run_button = gr.Button("Parse Uploaded Files") | |
| dir_path = gr.Textbox( | |
| label="Load from output directory", | |
| value=RESULT_DIR, | |
| lines=1, | |
| ) | |
| load_dir_button = gr.Button("Load from directory") | |
| # upload_summary = gr.Markdown(label="Upload Summary") | |
| # upload_table = gr.HTML(label="Upload Metrics") | |
| summary_output = gr.Markdown(label="Directory Summary") | |
| leaderboard_output = gr.HTML(label="Directory Metrics") | |
| # run_button.click( | |
| # fn=build_leaderboard_from_files, | |
| # inputs=files_input, | |
| # outputs=[upload_summary, upload_table], | |
| # ) | |
| load_dir_button.click( | |
| fn=load_from_dir, | |
| inputs=dir_path, | |
| outputs=[summary_output, leaderboard_output], | |
| ) | |
| timer = gr.Timer(5.0) | |
| timer.tick( | |
| fn=auto_refresh_from_dir, | |
| inputs=dir_path, | |
| outputs=[summary_output, leaderboard_output], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch(server_port=7861) |