jing084's picture
Update app.py
e2114e9 verified
raw
history blame
9.53 kB
#!/usr/bin/env python
import os
os.environ["GRADIO_LANGUAGE"] = "en"
RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
if not RESULT_DIR:
raise RuntimeError(
"MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR before running app.py"
)
import json
from typing import List, Tuple
import gradio as gr
import pandas as pd
from datasets import load_dataset
def f2(x):
"""Format to 2 decimal places if number, else return as-is."""
if isinstance(x, (int, float)):
return round(float(x), 2)
return x
def json_to_row(path: str, metrics: dict) -> dict:
model_name = metrics.get("model_name")
if not model_name:
model_name = "unknown-model"
dataset = metrics.get("dataset", "gsm8k")
method = metrics.get("method", "")
precision = metrics.get("precision", "")
gsm8k_e2e = metrics.get("gsm8k_e2e_s", None)
gsm8k_bs = metrics.get("gsm8k_bs", None)
gsm8k_gpu = metrics.get("gpu_type", "")
em = metrics.get("exact_match")
correct = metrics.get("correct")
total = metrics.get("total")
if isinstance(correct, (int, float)) and isinstance(total, (int, float)) and total > 0:
acc = correct / total
else:
acc = em
def pct(x):
return round(x * 100, 2) if isinstance(x, (int, float)) else None
if isinstance(model_name, str) and "/" in model_name:
hf_url = f"https://huggingface.co/{model_name}"
model_cell = f"<a href='{hf_url}' target='_blank'>{model_name}</a>"
else:
model_cell = model_name
row = {
"Model": model_cell,
"Dataset": dataset,
"Method": method,
"Precision": precision,
"GSM8K<br>E2E(s)": f2(gsm8k_e2e),
"GSM8K<br>bs": gsm8k_bs,
"GSM8K<br>GPU": gsm8k_gpu,
"GSM8K<br>Accuracy(%)": pct(acc),
"GSM8K<br>Decoding T/s": f2(metrics.get("decoding_throughput")),
"GSM8K<br>Prefill T/s": f2(metrics.get("prefill_tp")),
"GSM8K<br>Prefill<br>S-MBU(%)": pct(metrics.get("prefill_smbu")),
"GSM8K<br>Prefill<br>S-MFU(%)": pct(metrics.get("prefill_smfu")),
"GSM8K<br>Decoding<br>S-MBU(%)": pct(metrics.get("decoding_smbu")),
"GSM8K<br>Decoding<br>S-MFU(%)": pct(metrics.get("decoding_smfu")),
"TTFT(s)": f2(metrics.get("ttft")),
"TPOT(s)": f2(metrics.get("tpot")),
}
return row
# uoload
def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None):
if prev_rows is None:
prev_rows = []
if not files and prev_rows:
df = pd.DataFrame(prev_rows)
raw_models = set()
for cell in df["Model"].tolist():
if isinstance(cell, str) and "href" in cell:
try:
name = cell.split(">", 1)[1].split("<", 1)[0]
except Exception:
name = cell
else:
name = cell
raw_models.add(name)
links = []
for name in sorted(raw_models):
if isinstance(name, str) and "/" in name:
hf_url = f"https://huggingface.co/{name}"
links.append(f"[{name}]({hf_url})")
else:
links.append(str(name))
models_str = ", ".join(links)
summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}"
table_html = df.to_html(escape=False, index=False, classes="metrics-table")
return summary_md, table_html, prev_rows
new_rows = []
if files:
for f in files:
path = f.name
try:
with open(path, "r", encoding="utf-8") as fp:
metrics = json.load(fp)
new_rows.append(json_to_row(path, metrics))
except Exception:
continue
all_rows = prev_rows + new_rows
if not all_rows:
empty_html = "<p>No files loaded.</p>"
return "No files uploaded.", empty_html, []
df = pd.DataFrame(all_rows)
raw_models = set()
for cell in df["Model"].tolist():
if isinstance(cell, str) and "href" in cell:
try:
name = cell.split(">", 1)[1].split("<", 1)[0]
except Exception:
name = cell
else:
name = cell
raw_models.add(name)
links = []
for name in sorted(raw_models):
if isinstance(name, str) and "/" in name:
hf_url = f"https://huggingface.co/{name}"
links.append(f"[{name}]({hf_url})")
else:
links.append(str(name))
models_str = ", ".join(links)
summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}"
table_html = df.to_html(escape=False, index=False, classes="metrics-table")
return summary_md, table_html, all_rows
def load_from_dir(dir_path: str):
try:
pattern = f"hf://datasets/{dir_path}/**/*.json"
ds = load_dataset("json", data_files={"train": pattern}, split="train")
except Exception as e:
empty_html = "<p>No files loaded.</p>"
return f"Failed to load dataset `{dir_path}`: {e}", empty_html
rows = []
for i, example in enumerate(ds):
if isinstance(example, dict):
metrics = example.get("metrics") or example.get("json") or example
else:
metrics = example
rows.append(json_to_row(f"{dir_path}#{i}", metrics))
if not rows:
empty_html = "<p>No records found.</p>"
return f"No records found in dataset `{dir_path}`.", empty_html
df = pd.DataFrame(rows)
raw_models = set()
for cell in df["Model"].tolist():
if isinstance(cell, str) and "href" in cell:
try:
name = cell.split(">", 1)[1].split("<", 1)[0]
except Exception:
name = cell
else:
name = cell
raw_models.add(name)
links = []
for name in sorted(raw_models):
if isinstance(name, str) and "/" in name:
hf_url = f"https://huggingface.co/{name}"
links.append(f"[{name}]({hf_url})")
else:
links.append(str(name))
models_str = ", ".join(links)
summary_md = (
f"**Loaded {len(rows)} result files from dataset `{dir_path}`.** \n"
f"**Models:** {models_str}"
)
table_html = df.to_html(escape=False, index=False, classes="metrics-table")
return summary_md, table_html
# Gradio UI
def build_app() -> gr.Blocks:
row_css = """
.gradio-container table.metrics-table th,
.gradio-container table.metrics-table td {
padding-top: 10px;
padding-bottom: 10px;
padding-left: 8px;
padding-right: 8px;
border: 1px solid #e5e7eb;
}
.gradio-container table.metrics-table {
border-collapse: collapse;
width: 100%;
}
"""
with gr.Blocks(title="MoE-CAP Dashboard", css=row_css) as demo:
gr.Markdown("# MoE-CAP Dashboard")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(
"### Tasks\n"
"- Mathematics Problem-Solving Performance — "
"[**GSM8K**](https://arxiv.org/abs/2110-14168)\n\n"
"### Columns and Metrics\n"
"- Model \n"
"- Dataset \n"
"- Method \n"
"- Precision \n"
"- GSM8K E2E (s) \n"
"- GSM8K Batch Size \n"
"- GPU Type \n"
"- GSM8K Accuracy (%) \n"
"- Decoding Throughput (tokens/s) \n"
"- Prefill Throughput (tokens/s) \n"
"- Prefill S-MBU (%) \n"
"- Prefill S-MFU (%) \n"
"- Decoding S-MBU (%) \n"
"- Decoding S-MFU (%) \n"
"- TTFT (s) \n"
"- TPOT (s)"
)
with gr.Column(scale=1):
# manual upload
# files_input = gr.Files(
# label="Upload `cap_metrics_*.json` files",
# file_types=[".json"],
# file_count="multiple",
# )
# run_button = gr.Button("Parse Uploaded Files")
dir_path = gr.Textbox(
label="Load from output directory",
value=RESULT_DIR,
lines=1,
)
load_dir_button = gr.Button("Load from directory")
# upload_summary = gr.Markdown(label="Upload Summary")
# upload_table = gr.HTML(label="Upload Metrics")
summary_output = gr.Markdown(label="Directory Summary")
leaderboard_output = gr.HTML(label="Directory Metrics")
# run_button.click(
# fn=build_leaderboard_from_files,
# inputs=files_input,
# outputs=[upload_summary, upload_table],
# )
load_dir_button.click(
fn=load_from_dir,
inputs=dir_path,
outputs=[summary_output, leaderboard_output],
)
timer = gr.Timer(5.0)
timer.tick(
fn=auto_refresh_from_dir,
inputs=dir_path,
outputs=[summary_output, leaderboard_output],
)
return demo
if __name__ == "__main__":
app = build_app()
app.launch(server_port=7861)