Spaces:

auto-cap
/

MoE-CAP-Dashboard

Running

App Files Files Community

MoE-CAP-Dashboard / app.py

jing084

Update app.py

e2114e9 verified 9 days ago

raw

history blame

9.53 kB

	#!/usr/bin/env python
	import os
	os.environ["GRADIO_LANGUAGE"] = "en"


	RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
	if not RESULT_DIR:
	raise RuntimeError(
	"MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR before running app.py"
	)

	import json
	from typing import List, Tuple

	import gradio as gr
	import pandas as pd
	from datasets import load_dataset


	def f2(x):
	"""Format to 2 decimal places if number, else return as-is."""
	if isinstance(x, (int, float)):
	return round(float(x), 2)
	return x


	def json_to_row(path: str, metrics: dict) -> dict:
	model_name = metrics.get("model_name")
	if not model_name:
	model_name = "unknown-model"

	dataset = metrics.get("dataset", "gsm8k")

	method = metrics.get("method", "")
	precision = metrics.get("precision", "")
	gsm8k_e2e = metrics.get("gsm8k_e2e_s", None)
	gsm8k_bs = metrics.get("gsm8k_bs", None)
	gsm8k_gpu = metrics.get("gpu_type", "")

	em = metrics.get("exact_match")
	correct = metrics.get("correct")
	total = metrics.get("total")
	if isinstance(correct, (int, float)) and isinstance(total, (int, float)) and total > 0:
	acc = correct / total
	else:
	acc = em

	def pct(x):
	return round(x * 100, 2) if isinstance(x, (int, float)) else None

	if isinstance(model_name, str) and "/" in model_name:
	hf_url = f"https://huggingface.co/{model_name}"
	model_cell = f"<a href='{hf_url}' target='_blank'>{model_name}</a>"
	else:
	model_cell = model_name

	row = {
	"Model": model_cell,
	"Dataset": dataset,
	"Method": method,
	"Precision": precision,
	"GSM8K<br>E2E(s)": f2(gsm8k_e2e),
	"GSM8K<br>bs": gsm8k_bs,
	"GSM8K<br>GPU": gsm8k_gpu,
	"GSM8K<br>Accuracy(%)": pct(acc),
	"GSM8K<br>Decoding T/s": f2(metrics.get("decoding_throughput")),
	"GSM8K<br>Prefill T/s": f2(metrics.get("prefill_tp")),

	"GSM8K<br>Prefill<br>S-MBU(%)": pct(metrics.get("prefill_smbu")),
	"GSM8K<br>Prefill<br>S-MFU(%)": pct(metrics.get("prefill_smfu")),
	"GSM8K<br>Decoding<br>S-MBU(%)": pct(metrics.get("decoding_smbu")),
	"GSM8K<br>Decoding<br>S-MFU(%)": pct(metrics.get("decoding_smfu")),

	"TTFT(s)": f2(metrics.get("ttft")),
	"TPOT(s)": f2(metrics.get("tpot")),
	}
	return row


	# uoload

	def build_leaderboard_from_files(files: List[gr.File], prev_rows: list \| None = None):
	if prev_rows is None:
	prev_rows = []

	if not files and prev_rows:
	df = pd.DataFrame(prev_rows)
	raw_models = set()
	for cell in df["Model"].tolist():
	if isinstance(cell, str) and "href" in cell:
	try:
	name = cell.split(">", 1)[1].split("<", 1)[0]
	except Exception:
	name = cell
	else:
	name = cell
	raw_models.add(name)
	links = []
	for name in sorted(raw_models):
	if isinstance(name, str) and "/" in name:
	hf_url = f"https://huggingface.co/{name}"
	links.append(f"[{name}]({hf_url})")
	else:
	links.append(str(name))
	models_str = ", ".join(links)
	summary_md = f"Loaded {len(prev_rows)} result files. \nModels: {models_str}"
	table_html = df.to_html(escape=False, index=False, classes="metrics-table")
	return summary_md, table_html, prev_rows

	new_rows = []
	if files:
	for f in files:
	path = f.name
	try:
	with open(path, "r", encoding="utf-8") as fp:
	metrics = json.load(fp)
	new_rows.append(json_to_row(path, metrics))
	except Exception:
	continue

	all_rows = prev_rows + new_rows

	if not all_rows:
	empty_html = "<p>No files loaded.</p>"
	return "No files uploaded.", empty_html, []

	df = pd.DataFrame(all_rows)

	raw_models = set()
	for cell in df["Model"].tolist():
	if isinstance(cell, str) and "href" in cell:
	try:
	name = cell.split(">", 1)[1].split("<", 1)[0]
	except Exception:
	name = cell
	else:
	name = cell
	raw_models.add(name)
	links = []
	for name in sorted(raw_models):
	if isinstance(name, str) and "/" in name:
	hf_url = f"https://huggingface.co/{name}"
	links.append(f"[{name}]({hf_url})")
	else:
	links.append(str(name))
	models_str = ", ".join(links)
	summary_md = f"Loaded {len(all_rows)} result files. \nModels: {models_str}"

	table_html = df.to_html(escape=False, index=False, classes="metrics-table")

	return summary_md, table_html, all_rows


	def load_from_dir(dir_path: str):
	try:
	pattern = f"hf://datasets/{dir_path}/*/.json"
	ds = load_dataset("json", data_files={"train": pattern}, split="train")
	except Exception as e:
	empty_html = "<p>No files loaded.</p>"
	return f"Failed to load dataset `{dir_path}`: {e}", empty_html

	rows = []
	for i, example in enumerate(ds):
	if isinstance(example, dict):
	metrics = example.get("metrics") or example.get("json") or example
	else:
	metrics = example
	rows.append(json_to_row(f"{dir_path}#{i}", metrics))

	if not rows:
	empty_html = "<p>No records found.</p>"
	return f"No records found in dataset `{dir_path}`.", empty_html

	df = pd.DataFrame(rows)

	raw_models = set()
	for cell in df["Model"].tolist():
	if isinstance(cell, str) and "href" in cell:
	try:
	name = cell.split(">", 1)[1].split("<", 1)[0]
	except Exception:
	name = cell
	else:
	name = cell
	raw_models.add(name)

	links = []
	for name in sorted(raw_models):
	if isinstance(name, str) and "/" in name:
	hf_url = f"https://huggingface.co/{name}"
	links.append(f"[{name}]({hf_url})")
	else:
	links.append(str(name))
	models_str = ", ".join(links)

	summary_md = (
	f"Loaded {len(rows)} result files from dataset `{dir_path}`. \n"
	f"Models: {models_str}"
	)

	table_html = df.to_html(escape=False, index=False, classes="metrics-table")

	return summary_md, table_html


	# Gradio UI

	def build_app() -> gr.Blocks:
	row_css = """
	.gradio-container table.metrics-table th,
	.gradio-container table.metrics-table td {
	padding-top: 10px;
	padding-bottom: 10px;
	padding-left: 8px;
	padding-right: 8px;
	border: 1px solid #e5e7eb;
	}
	.gradio-container table.metrics-table {
	border-collapse: collapse;
	width: 100%;
	}
	"""

	with gr.Blocks(title="MoE-CAP Dashboard", css=row_css) as demo:
	gr.Markdown("# MoE-CAP Dashboard")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	"### Tasks\n"
	"- Mathematics Problem-Solving Performance — "
	"[GSM8K](https://arxiv.org/abs/2110-14168)\n\n"
	"### Columns and Metrics\n"
	"- Model \n"
	"- Dataset \n"
	"- Method \n"
	"- Precision \n"
	"- GSM8K E2E (s) \n"
	"- GSM8K Batch Size \n"
	"- GPU Type \n"
	"- GSM8K Accuracy (%) \n"
	"- Decoding Throughput (tokens/s) \n"
	"- Prefill Throughput (tokens/s) \n"
	"- Prefill S-MBU (%) \n"
	"- Prefill S-MFU (%) \n"
	"- Decoding S-MBU (%) \n"
	"- Decoding S-MFU (%) \n"
	"- TTFT (s) \n"
	"- TPOT (s)"
	)

	with gr.Column(scale=1):
	# manual upload
	# files_input = gr.Files(
	# label="Upload `cap_metrics_*.json` files",
	# file_types=[".json"],
	# file_count="multiple",
	# )
	# run_button = gr.Button("Parse Uploaded Files")

	dir_path = gr.Textbox(
	label="Load from output directory",
	value=RESULT_DIR,
	lines=1,
	)
	load_dir_button = gr.Button("Load from directory")

	# upload_summary = gr.Markdown(label="Upload Summary")
	# upload_table = gr.HTML(label="Upload Metrics")

	summary_output = gr.Markdown(label="Directory Summary")
	leaderboard_output = gr.HTML(label="Directory Metrics")

	# run_button.click(
	# fn=build_leaderboard_from_files,
	# inputs=files_input,
	# outputs=[upload_summary, upload_table],
	# )

	load_dir_button.click(
	fn=load_from_dir,
	inputs=dir_path,
	outputs=[summary_output, leaderboard_output],
	)



	timer = gr.Timer(5.0)
	timer.tick(
	fn=auto_refresh_from_dir,
	inputs=dir_path,
	outputs=[summary_output, leaderboard_output],
	)

	return demo


	if __name__ == "__main__":
	app = build_app()
	app.launch(server_port=7861)