Spaces:

auto-cap
/

MoE-CAP-Dashboard

Running

App Files Files Community

AppleSwing commited on 15 days ago

Commit

f041cb6

verified ·

1 Parent(s): 3094acc

Update app.py

Browse files

Files changed (1) hide show

app.py +318 -321

app.py CHANGED Viewed

@@ -65,340 +65,337 @@ def json_to_row(path: str, metrics: dict) -> dict:
         "E2E(s)": f2(e2e_s),
         "Batch size": batch_size,
         "GPU": gpu_type,
-        "Cost": f2(cost),
-        "Accuracy(%)": pct(acc),
     }
     return row
-def load_all_results() -> Tuple[pd.DataFrame, List[str]]:
-    ds = load_dataset(RESULT_DIR, split="train", token=True)
-    rows_list = []
-    file_paths = []
-    for item in ds:
-        path = item["path"]
-        content = item["content"]
-        metrics = json.loads(content)
-        row = json_to_row(path, metrics)
-        rows_list.append(row)
-        file_paths.append(path)
-    df = pd.DataFrame(rows_list)
-    return df, file_paths
-def update_table(
-    dataset_filter,
-    method_filter,
-    precision_filter,
-    model_type_filter,
-    gpu_filter
 ):
-    df_all, all_paths = load_all_results()
-    df_filtered = df_all.copy()
-    if dataset_filter and dataset_filter != "All":
-        df_filtered = df_filtered[df_filtered["Dataset"] == dataset_filter]
-    if method_filter and method_filter != "All":
-        df_filtered = df_filtered[df_filtered["Method"] == method_filter]
-    if precision_filter and precision_filter != "All":
-        df_filtered = df_filtered[df_filtered["Precision"] == precision_filter]
-    if model_type_filter and model_type_filter != "All":
-        df_filtered = df_filtered[df_filtered["Model type"] == model_type_filter]
-    if gpu_filter and gpu_filter != "All":
-        df_filtered = df_filtered[df_filtered["GPU"] == gpu_filter]
-    return df_filtered
-def get_unique_values(column_name: str) -> list:
-    df_all, _ = load_all_results()
-    unique_vals = sorted(df_all[column_name].dropna().unique().tolist())
-    return ["All"] + unique_vals
-# FIXED CSS with proper background colors and contrast
-custom_css = """
-/* Force white background for the entire app */
-body, .gradio-container {
-    background-color: #ffffff !important;
-    color: #000000 !important;
-}
-/* Header Styling */
-.header-container {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    padding: 30px 40px;
-    margin-bottom: 30px;
-    border-radius: 12px;
-    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
-}
-.header-container h1 {
-    color: #ffffff !important;
-    font-size: 32px;
-    font-weight: 700;
-    margin: 0;
-    text-shadow: 0 2px 4px rgba(0,0,0,0.2);
-}
-.header-container p {
-    color: #f0f0f0 !important;
-    font-size: 16px;
-    margin: 10px 0 0 0;
-    opacity: 0.95;
-}
-/* Main Layout Container */
-#main-container {
-    display: flex;
-    gap: 20px;
-    height: calc(100vh - 200px);
-    min-height: 600px;
-}
-/* Sidebar Filters */
-#sidebar {
-    width: 350px;
-    flex-shrink: 0;
-    background-color: #ffffff !important;
-    border-radius: 12px;
-    padding: 20px;
-    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-    overflow-y: auto;
-    border: 1px solid #e0e0e0;
-}
-#sidebar h3 {
-    color: #333333 !important;
-    font-size: 20px;
-    font-weight: 600;
-    margin-bottom: 20px;
-    padding-bottom: 10px;
-    border-bottom: 2px solid #667eea;
-}
-.filter-group {
-    background-color: #f8f9fa !important;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 15px;
-    border: 1px solid #e9ecef;
-}
-.filter-group label {
-    color: #333333 !important;
-    font-weight: 500;
-    font-size: 14px;
-    display: block;
-    margin-bottom: 8px;
-}
-/* Dropdown styling */
-.filter-group select,
-.filter-group input {
-    background-color: #ffffff !important;
-    color: #000000 !important;
-    border: 1px solid #ced4da !important;
-    border-radius: 6px;
-    padding: 8px 12px;
-}
-/* Table Container */
-#table-container {
-    flex: 1;
-    background-color: #ffffff !important;
-    border-radius: 12px;
-    padding: 0;
-    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-    overflow: hidden;
-    display: flex;
-    flex-direction: column;
-    border: 1px solid #e0e0e0;
-}
-/* CRITICAL: Fixed height table wrapper with scrolling */
-.table-wrapper {
-    flex: 1;
-    overflow-y: auto !important;
-    overflow-x: auto !important;
-    max-height: calc(100vh - 280px) !important;
-    min-height: 400px;
-}
-/* Table Styling */
-table {
-    width: 100%;
-    border-collapse: collapse;
-    background-color: #ffffff !important;
-    color: #000000 !important;
-}
-/* Sticky header */
-thead {
-    position: sticky;
-    top: 0;
-    z-index: 10;
-    background-color: #667eea !important;
-}
-thead th {
-    background-color: #667eea !important;
-    color: #ffffff !important;
-    padding: 16px 12px;
-    text-align: left;
-    font-weight: 600;
-    font-size: 14px;
-    border-bottom: 2px solid #5568d3;
-    white-space: nowrap;
-}
-tbody tr {
-    background-color: #ffffff !important;
-    border-bottom: 1px solid #e9ecef;
-    transition: background-color 0.2s;
-}
-tbody tr:nth-child(even) {
-    background-color: #f8f9fa !important;
-}
-tbody tr:hover {
-    background-color: #e7f1ff !important;
-}
-tbody td {
-    padding: 12px;
-    color: #333333 !important;
-    font-size: 13px;
-    border-bottom: 1px solid #e9ecef;
-}
-/* Links in table */
-tbody td a {
-    color: #667eea !important;
-    text-decoration: none;
-    font-weight: 500;
-}
-tbody td a:hover {
-    color: #764ba2 !important;
-    text-decoration: underline;
-}
-/* Hide Gradio footer */
-footer {
-    display: none !important;
-}
-/* Ensure all text elements have proper contrast */
-* {
-    color: inherit;
-}
-label, p, span, div {
-    color: #333333 !important;
-}
-/* Responsive Design */
-@media (max-width: 1200px) {
-    #main-container {
-        flex-direction: column;
-        height: auto;
-    }
-    #sidebar {
-        width: 100%;
-        margin-bottom: 20px;
     }
-    .table-wrapper {
-        max-height: 600px !important;
     }
-}
-"""
-with gr.Blocks(css=custom_css, title="MoE-CAP Dashboard") as demo:
-    # Header
-    with gr.Row():
-        gr.HTML("""
-            <div class="header-container">
-                <h1>🚀 MoE-CAP Results Dashboard</h1>
-                <p>Explore and analyze model evaluation results across different datasets, methods, and configurations</p>
-            </div>
-        """)
-    # Main layout with sidebar and table
-    with gr.Row(elem_id="main-container"):
-        # Sidebar with filters
-        with gr.Column(elem_id="sidebar", scale=0):
-            gr.HTML("<h3>🔍 Filter Options</h3>")
-            with gr.Group(elem_classes="filter-group"):
-                gr.HTML("<label>📊 Dataset</label>")
-                dataset_dropdown = gr.Dropdown(
-                    choices=get_unique_values("Dataset"),
-                    value="All",
-                    label="",
-                    interactive=True
                 )
-            with gr.Group(elem_classes="filter-group"):
-                gr.HTML("<label>⚙️ Method</label>")
-                method_dropdown = gr.Dropdown(
-                    choices=get_unique_values("Method"),
-                    value="All",
-                    label="",
-                    interactive=True
                 )
-            with gr.Group(elem_classes="filter-group"):
-                gr.HTML("<label>🎯 Precision</label>")
-                precision_dropdown = gr.Dropdown(
-                    choices=get_unique_values("Precision"),
-                    value="All",
-                    label="",
-                    interactive=True
                 )
-            with gr.Group(elem_classes="filter-group"):
-                gr.HTML("<label>🤖 Model Type</label>")
-                model_type_dropdown = gr.Dropdown(
-                    choices=get_unique_values("Model type"),
-                    value="All",
-                    label="",
-                    interactive=True
                 )
-            with gr.Group(elem_classes="filter-group"):
-                gr.HTML("<label>🖥️ GPU</label>")
-                gpu_dropdown = gr.Dropdown(
-                    choices=get_unique_values("GPU"),
-                    value="All",
-                    label="",
-                    interactive=True
                 )
-        # Table container with proper scrolling
-        with gr.Column(elem_id="table-container", scale=1):
-            gr.HTML('<div class="table-wrapper">')
-            dataframe_output = gr.Dataframe(
-                value=update_table("All", "All", "All", "All", "All"),
-                interactive=False,
-                wrap=False,
-                datatype=["html"] + ["str"] * 9,
-                column_widths=["25%", "10%", "10%", "10%", "10%", "8%", "8%", "8%", "8%", "10%"]
-            )
-            gr.HTML('</div>')
-    # Update table when filters change
-    for dropdown in [dataset_dropdown, method_dropdown, precision_dropdown, model_type_dropdown, gpu_dropdown]:
-        dropdown.change(
-            fn=update_table,
-            inputs=[dataset_dropdown, method_dropdown, precision_dropdown, model_type_dropdown, gpu_dropdown],
-            outputs=dataframe_output
         )
 if __name__ == "__main__":
-    demo.launch(share=False)

         "E2E(s)": f2(e2e_s),
         "Batch size": batch_size,
         "GPU": gpu_type,
+        "Accuracy(%)": pct(acc),
+        "Cost($)": cost,
+        "Decoding T/s": f2(metrics.get("decoding_throughput")),
+        "Prefill T/s": f2(metrics.get("prefill_tp")),
+        "Prefill<br>S-MBU(%)": pct(metrics.get("prefill_smbu")),
+        "Prefill<br>S-MFU(%)": pct(metrics.get("prefill_smfu")),
+        "Decoding<br>S-MBU(%)": pct(metrics.get("decoding_smbu")),
+        "Decoding<br>S-MFU(%)": pct(metrics.get("decoding_smfu")),
+        "TTFT(s)": f2(metrics.get("ttft")),
+        "TPOT(s)": f2(metrics.get("tpot")),
     }
     return row
+def build_leaderboard_from_files(files: List[gr.File], prev_rows: list | None = None):
+    if prev_rows is None:
+        prev_rows = []
+    if not files and prev_rows:
+        df = pd.DataFrame(prev_rows)
+        raw_models = set()
+        for cell in df["Model"].tolist():
+            if isinstance(cell, str) and "href" in cell:
+                try:
+                    name = cell.split(">", 1)[1].split("<", 1)[0]
+                except Exception:
+                    name = cell
+            else:
+                name = cell
+            raw_models.add(name)
+        links = []
+        for name in sorted(raw_models):
+            if isinstance(name, str) and "/" in name:
+                hf_url = f"https://huggingface.co/{name}"
+                links.append(f"[{name}]({hf_url})")
+            else:
+                links.append(str(name))
+        models_str = ", ".join(links)
+        summary_md = f"**Loaded {len(prev_rows)} result files.** \n**Models:** {models_str}"
+        table_html = df.to_html(escape=False, index=False, classes="metrics-table")
+        return summary_md, table_html, prev_rows
+    new_rows = []
+    if files:
+        for f in files:
+            path = f.name
+            try:
+                with open(path, "r", encoding="utf-8") as fp:
+                    metrics = json.load(fp)
+                new_rows.append(json_to_row(path, metrics))
+            except Exception:
+                continue
+    all_rows = prev_rows + new_rows
+    if not all_rows:
+        empty_html = "<p>No files loaded.</p>"
+        return "No files uploaded.", empty_html, []
+    df = pd.DataFrame(all_rows)
+    raw_models = set()
+    for cell in df["Model"].tolist():
+        if isinstance(cell, str) and "href" in cell:
+            try:
+                name = cell.split(">", 1)[1].split("<", 1)[0]
+            except Exception:
+                name = cell
+        else:
+            name = cell
+        raw_models.add(name)
+    links = []
+    for name in sorted(raw_models):
+        if isinstance(name, str) and "/" in name:
+            hf_url = f"https://huggingface.co/{name}"
+            links.append(f"[{name}]({hf_url})")
+        else:
+            links.append(str(name))
+    models_str = ", ".join(links)
+    summary_md = f"**Loaded {len(all_rows)} result files.** \n**Models:** {models_str}"
+    table_html = df.to_html(escape=False, index=False, classes="metrics-table")
+    return summary_md, table_html, all_rows
+def load_from_dir(
+    dir_path: str,
+    selected_tasks: List[str] | None = None,
+    selected_frameworks: List[str] | None = None,
+    selected_model_types: List[str] | None = None,
+    selected_precisions: List[str] | None = None,
+    force_refresh: bool = False,
 ):
+    try:
+        pattern = f"hf://datasets/{dir_path}/**/*.json"
+        dl_mode = "force_redownload" if force_refresh else None
+        print(f"Fetching from {pattern} (mode={dl_mode})...")
+        ds = load_dataset(
+            "json",
+            data_files={"train": pattern},
+            split="train",
+            download_mode=dl_mode,
+        )
+    except Exception as e:
+        empty_html = "<p>No files loaded or Dataset not found.</p>"
+        return empty_html
+    rows = []
+    for i, example in enumerate(ds):
+        if isinstance(example, dict):
+            metrics = example.get("metrics") or example.get("json") or example
+        else:
+            metrics = example
+        rows.append(json_to_row(f"{dir_path}#{i}", metrics))
+    if not rows:
+        empty_html = "<p>No records found.</p>"
+        return empty_html
+    df = pd.DataFrame(rows)
+    # Dataset filter
+    if selected_tasks is not None:
+        lower_selected = [x.lower() for x in selected_tasks]
+        df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
+    # Inference framework filter (Method)
+    if selected_frameworks is not None:
+        lower_selected = [str(x).lower() for x in selected_frameworks]
+        df = df[df["Method"].astype(str).str.lower().isin(lower_selected)]
+    # Model type filter
+    if selected_model_types is not None:
+        lower_selected = [str(x).lower() for x in selected_model_types]
+        df = df[df["Model type"].astype(str).str.lower().isin(lower_selected)]
+    # Precision filter
+    if selected_precisions is not None:
+        lower_selected = [str(x).lower() for x in selected_precisions]
+        df = df[df["Precision"].astype(str).str.lower().isin(lower_selected)]
+    if df.empty:
+        empty_html = "<p>No records found.</p>"
+        return empty_html
+    df = df.fillna("-")
+    raw_models = set()
+    for cell in df["Model"].tolist():
+        if isinstance(cell, str) and "href" in cell:
+            try:
+                name = cell.split(">", 1)[1].split("<", 1)[0]
+            except Exception:
+                name = cell
+        else:
+            name = cell
+        raw_models.add(name)
+    links = []
+    for name in sorted(raw_models):
+        if isinstance(name, str) and "/" in name:
+            hf_url = f"https://huggingface.co/{name}"
+            links.append(f"[{name}]({hf_url})")
+        else:
+            links.append(str(name))
+    models_str = ", ".join(links)
+    # summary_md = (
+    #     f"**Loaded {len(df)} result files from dataset `{dir_path}`.** \n"
+    #     f"**Models:** {models_str}"
+    # )
+    table_html = df.to_html(escape=False, index=False, classes="metrics-table")
+    return table_html
+def auto_refresh_from_dir(
+    dir_path: str,
+    selected_tasks: List[str] | None = None,
+    selected_frameworks: List[str] | None = None,
+    selected_model_types: List[str] | None = None,
+    selected_precisions: List[str] | None = None,
+):
+    return load_from_dir(
+        dir_path,
+        selected_tasks=selected_tasks,
+        selected_frameworks=selected_frameworks,
+        selected_model_types=selected_model_types,
+        selected_precisions=selected_precisions,
+        force_refresh=True,
+    )
+# Gradio UI
+def build_app() -> gr.Blocks:
+    row_css = """
+    .gradio-container table.metrics-table th,
+    .gradio-container table.metrics-table td {
+        padding-top: 10px;
+        padding-bottom: 10px;
+        padding-left: 8px;
+        padding-right: 8px;
+        border: 1px solid #e5e7eb;
     }
+    .gradio-container table.metrics-table {
+        border-collapse: collapse;
+        width: 100%;
     }
+    """
+    with gr.Blocks(title="MoE-CAP Dashboard", css=row_css) as demo:
+        gr.Markdown("# MoE-CAP Dashboard")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    "### Tasks\n"
+                    "- Mathematics Problem-Solving Performance — "
+                    "[**GSM8K**](https://arxiv.org/abs/2110-14168)\n\n"
+                    "- Long-Context Understanding — "
+                    "[**LongBench**](https://arxiv.org/abs/2412.15204)\n"
+                    "- Massive Multitask Language Understanding — "
+                    "[**MMLU**](https://arxiv.org/abs/2009.03300)\n"
+                    "- Mathematical Reasoning — "
+                    "[**NuminaMath**](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf)\n"
+                    "- Extreme Long-Context Evaluation — "
+                    "[**RULER**](https://arxiv.org/abs/2404.06654)\n\n"
+                    "### Columns and Metrics\n"
+                    "- End-to-End Latency (s)  \n"
+                    "- Batch Size  \n"
+                    "- GPU Type  \n"
+                    "- Accuracy (%)  \n"
+                    "- Cost ($)  \n"
+                    "- Decoding Throughput (tokens/s)  \n"
+                    "- Prefill Throughput (tokens/s)  \n"
+                    "- Prefill S-MBU (%)  \n"
+                    "- Prefill S-MFU (%)  \n"
+                    "- Decoding S-MBU (%)  \n"
+                    "- Decoding S-MFU (%)  \n"
+                    "- TTFT (s)  \n"
+                    "- TPOT (s)"
                 )
+            with gr.Column(scale=1):
+                dir_path = gr.State(RESULT_DIR)
+                # 1) Tasks filter
+                task_filter = gr.CheckboxGroup(
+                    label="Tasks",
+                    choices=[
+                        ("GSM8K", "gsm8k"),
+                        ("LongBench", "longbench"),
+                        ("MMLU", "mmlu"),
+                        ("NuminaMath", "numinamath"),
+                        ("RULER", "ruler")
+                    ],
+                    value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
                 )
+                # 2) Inference frameworks filter
+                framework_filter = gr.CheckboxGroup(
+                    label="Inference frameworks",
+                    choices=["sglang", "vllm"],
+                    value=["sglang", "vllm"],
                 )
+                # 3) Model types filter
+                model_type_filter = gr.CheckboxGroup(
+                    label="Model types",
+                    choices=["instruct", "thinking"],
+                    value=["instruct", "thinking"],
                 )
+                # 4) Precision filter
+                precision_filter = gr.CheckboxGroup(
+                    label="Precision",
+                    choices=["bfloat16", "fp8"],
+                    value=["bfloat16", "fp8"],
                 )
+        # summary_output = gr.Markdown(label="Directory Summary")
+        leaderboard_output = gr.HTML(label="Directory Metrics")
+        # demo.load(
+        #     fn=load_from_dir,
+        #     inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+        #     outputs=[leaderboard_output],
+        # )
+        demo.load(
+            fn=auto_refresh_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
+        )
+        task_filter.change(
+            fn=load_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
+        )
+        framework_filter.change(
+            fn=load_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
+        )
+        model_type_filter.change(
+            fn=load_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
+        )
+        precision_filter.change(
+            fn=load_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
         )
+        timer = gr.Timer(60.0)
+        timer.tick(
+            fn=auto_refresh_from_dir,
+            inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter],
+            outputs=[leaderboard_output],
+        )
+    return demo
 if __name__ == "__main__":
+    app = build_app()
+    app.launch()