AppleSwing commited on
Commit
9a48e69
Β·
verified Β·
1 Parent(s): a67de47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -176
app.py CHANGED
@@ -7,7 +7,6 @@ os.environ["GRADIO_LANGUAGE"] = "en"
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
10
- # For testing purposes, you can uncomment the line below to set a dummy dir or keep the raise
11
  # RESULT_DIR = "generic_result_dir"
12
  raise RuntimeError(
13
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
@@ -33,13 +32,6 @@ def normalize(val, vmin, vmax, baseline=20):
33
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
34
 
35
 
36
- def normalize_reversed(val, vmin, vmax, baseline=20):
37
- """Normalize value (reversed - lower is better) to baseline-100 range."""
38
- if vmax == vmin:
39
- return baseline + 40
40
- return baseline + (vmax - val) / (vmax - vmin) * (100 - baseline)
41
-
42
-
43
  def normalize_cost(val, max_tick, baseline=20):
44
  """Normalize cost (lower is better)."""
45
  if max_tick == 0:
@@ -118,7 +110,6 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
118
  # Extract metrics from selected rows
119
  data = {}
120
  for row in selected_rows_data:
121
- # Extract model name from HTML or use as-is
122
  model_name = row.get('Model', 'Unknown')
123
  if isinstance(model_name, str) and 'href' in model_name:
124
  try:
@@ -126,23 +117,19 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
126
  except:
127
  pass
128
 
129
- # Format legend name: extract name after "/" and add method
130
  method = row.get('Method', '')
131
  if isinstance(model_name, str) and '/' in model_name:
132
- legend_name = model_name.split('/')[-1] # Get part after last /
133
  else:
134
  legend_name = str(model_name)
135
 
136
- # Add method suffix
137
  if method and method not in ['Unknown', '-', '']:
138
  legend_name = f"{legend_name}-{method}"
139
 
140
- # Get metrics
141
  acc = row.get('Accuracy(%)', 0)
142
  cost = row.get('Cost($)', 0)
143
  throughput = row.get('Decoding T/s', 0)
144
 
145
- # Convert to float if needed
146
  try:
147
  acc = float(acc) if acc not in [None, '-', ''] else 0
148
  cost = float(cost) if cost not in [None, '-', ''] else 0
@@ -151,12 +138,12 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
151
  acc, cost, throughput = 0, 0, 0
152
 
153
  data[legend_name] = {
154
- 'accuracy': acc / 100.0 if acc > 1 else acc, # Normalize to 0-1
155
  'cost': cost,
156
  'throughput': throughput
157
  }
158
 
159
- # Get min/max for normalization
160
  throughputs = [v['throughput'] for v in data.values()]
161
  costs = [v['cost'] for v in data.values()]
162
  accs = [v['accuracy'] for v in data.values()]
@@ -177,7 +164,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
177
  normalize_cost(values['cost'], cost_max, baseline),
178
  normalize(values['accuracy'], acc_min, acc_max, baseline)
179
  ]
180
- norm_vals += [norm_vals[0]] # Close the loop
181
 
182
  hovertext = [
183
  f"Throughput: {raw_vals[0]:.2f} T/s",
@@ -211,7 +198,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
211
  ),
212
  angularaxis=dict(
213
  tickfont=dict(size=14),
214
- rotation=90, # Rotate so top is 12 o'clock
215
  direction='clockwise'
216
  ),
217
  ),
@@ -235,7 +222,6 @@ def json_to_row(path: str, metrics: dict) -> dict:
235
  model_name = "unknown-model"
236
 
237
  dataset = metrics.get("dataset", "Unknown")
238
-
239
  method = metrics.get("method", "Unknown")
240
  precision = metrics.get("precision", "Unknown")
241
  model_type = metrics.get("model_type", "Unknown")
@@ -297,113 +283,89 @@ def load_from_dir(
297
  try:
298
  pattern = f"hf://datasets/{dir_path}/**/*.json"
299
  dl_mode = "force_redownload" if force_refresh else None
300
-
301
- print(f"Fetching from {pattern} (mode={dl_mode})...")
302
- ds = load_dataset(
303
- "json",
304
- data_files={"train": pattern},
305
- split="train",
306
- download_mode=dl_mode,
307
- )
308
- except Exception as e:
309
- empty_html = "<p>No files loaded or Dataset not found.</p>"
310
- return empty_html, []
311
 
312
  rows = []
313
  for i, example in enumerate(ds):
314
- if isinstance(example, dict):
315
- metrics = example.get("metrics") or example.get("json") or example
316
- else:
317
- metrics = example
318
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
319
 
320
  if not rows:
321
- empty_html = "<p>No records found.</p>"
322
- return empty_html, []
323
 
324
  df = pd.DataFrame(rows)
325
 
326
- # Filters
327
- if selected_tasks is not None:
328
- lower_selected = [x.lower() for x in selected_tasks]
329
- df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
330
-
331
- if selected_frameworks is not None:
332
- lower_selected = [str(x).lower() for x in selected_frameworks]
333
- df = df[df["Method"].astype(str).str.lower().isin(lower_selected)]
334
-
335
- if selected_model_types is not None:
336
- lower_selected = [str(x).lower() for x in selected_model_types]
337
- df = df[df["Model type"].astype(str).str.lower().isin(lower_selected)]
338
-
339
- if selected_precisions is not None:
340
- lower_selected = [str(x).lower() for x in selected_precisions]
341
- df = df[df["Precision"].astype(str).str.lower().isin(lower_selected)]
342
-
343
  if search_keyword and search_keyword.strip():
344
- keyword_lower = search_keyword.strip().lower()
345
- mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(keyword_lower).any(), axis=1)
346
  df = df[mask]
347
 
348
  if df.empty:
349
- empty_html = "<p>No records found.</p>"
350
- return empty_html, []
351
 
352
  df = df.fillna("-")
353
-
354
- # Insert row number column at the beginning
355
  df.insert(0, 'Row #', range(len(df)))
356
 
357
- # Create HTML table
358
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
359
  df_without_rownum = df.drop('Row #', axis=1)
360
- df_dict = df_without_rownum.to_dict('records')
361
- return table_html, df_dict
362
 
363
 
364
- def auto_refresh_from_dir(
365
- dir_path: str,
366
- selected_tasks: List[str] | None = None,
367
- selected_frameworks: List[str] | None = None,
368
- selected_model_types: List[str] | None = None,
369
- selected_precisions: List[str] | None = None,
370
- search_keyword: str = "",
371
- ):
372
- return load_from_dir(
373
- dir_path,
374
- selected_tasks=selected_tasks,
375
- selected_frameworks=selected_frameworks,
376
- selected_model_types=selected_model_types,
377
- selected_precisions=selected_precisions,
378
- search_keyword=search_keyword,
379
- force_refresh=True,
380
- )
381
 
382
 
383
  def parse_and_generate_plot(df_data: list, indices_str: str):
384
- """Parse comma-separated indices and generate radar plot."""
385
  if not indices_str or not indices_str.strip():
386
  return generate_radar_plot([])
387
-
388
  try:
389
- # Parse comma-separated indices
390
  indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()]
391
- # Limit to 3 rows
392
- indices = indices[:3]
393
- # Get selected rows
394
- selected_rows = [df_data[i] for i in indices if 0 <= i < len(df_data)]
395
- return generate_radar_plot(selected_rows)
396
- except (ValueError, IndexError):
397
  return generate_radar_plot([])
398
 
399
 
400
- # Gradio UI
401
-
402
  def build_app() -> gr.Blocks:
403
  row_css = """
404
  body { background-color: #f5f7fa !important; }
405
 
406
- /* Row number column styling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  .metrics-table th:first-child, .metrics-table td:first-child {
408
  width: 60px !important; text-align: center !important;
409
  padding: 8px !important; font-weight: 600 !important;
@@ -414,68 +376,43 @@ def build_app() -> gr.Blocks:
414
  border-radius: 6px; border: 2px solid #e1e4e8 !important;
415
  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); margin-bottom: 16px;
416
  }
417
- .search-box .block { background: transparent !important; border: none !important; padding: 0 !important; }
418
- .search-box label span { color: #24292e !important; font-weight: 600; font-size: 14px; margin-bottom: 8px; }
419
- .search-box input.scroll-hide {
420
- background-color: white !important; color: #24292e !important;
421
- border: 1.5px solid #e1e4e8 !important; border-radius: 4px !important;
422
- padding: 10px !important; box-shadow: none !important;
423
- }
424
- .search-box input.scroll-hide:focus { border-color: #0366d6 !important; outline: none !important; }
425
-
426
  .gradio-container { max-width: 100% !important; padding: 20px !important; background-color: #f5f7fa !important; }
427
- .gradio-container .block, .gradio-container .form, .gradio-container .gr-box, .gradio-container .gr-input {
428
- background-color: white !important; border-color: #e1e4e8 !important;
429
- }
430
  .gradio-container label, .gradio-container p, .gradio-container span, .gradio-container div { color: #24292e !important; }
431
 
432
- /* Table styling */
433
- .gradio-container table.metrics-table th, .gradio-container table.metrics-table td {
434
- padding: 10px 14px; border: 1.5px solid #e1e4e8; white-space: nowrap;
435
- font-size: 13px; text-align: left; color: #24292e !important;
436
- }
437
  .gradio-container table.metrics-table th {
438
  background: linear-gradient(to bottom, #fafbfc, #f6f8fa);
439
  font-weight: 600; position: sticky; top: 0; z-index: 10;
440
  border-bottom: 2px solid #d1d5da;
441
  }
442
- .gradio-container table.metrics-table tbody tr:nth-child(even) { background-color: #f6f8fa; }
443
- .gradio-container table.metrics-table tbody tr:hover { background-color: #e1e4e8; }
444
  .gradio-container table.metrics-table { border-collapse: collapse; width: 100%; background: white; }
445
- .gradio-container table.metrics-table a { color: #0366d6 !important; text-decoration: none; }
446
- .gradio-container table.metrics-table a:hover { text-decoration: underline; }
447
-
448
- /* Allow plot container to expand */
449
- .gradio-container .plot-container { width: 100% !important; }
450
 
 
451
  .table-container {
452
- overflow-x: auto; overflow-y: auto; max-height: 75vh;
 
453
  border: 2px solid #e1e4e8; border-radius: 6px;
454
  background: white; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
455
  }
 
456
  .filter-section {
457
  background: white !important; padding: 0 !important; border-radius: 6px;
458
  border: 2px solid #e1e4e8 !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
459
  }
460
- .filter-section .wrap, .filter-section .block, .filter-section .container, .filter-section .group { background: transparent !important; }
461
  .filter-section .wrap { padding: 20px !important; }
462
- .gradio-container .accordion {
463
- background: white !important; border: 2px solid #e1e4e8 !important;
464
- border-radius: 6px !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
465
- }
466
  .info-section { padding: 16px; background: white !important; }
467
- .info-section a { color: #0366d6 !important; }
468
  .gradio-container h1 { color: #24292e !important; font-weight: 700; margin-bottom: 24px; }
469
  .gradio-container h3 { color: #24292e !important; font-weight: 600; margin-bottom: 16px; }
470
- .gradio-container input[type="checkbox"] { accent-color: #0366d6 !important; }
471
  """
472
 
473
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
474
  gr.Markdown("# MoE-CAP Dashboard")
475
 
476
  with gr.Row():
477
- # Left side - Filters (narrower)
478
- with gr.Column(scale=2):
 
479
  with gr.Group(elem_classes="search-box"):
480
  search_input = gr.Textbox(
481
  label="πŸ” Search",
@@ -485,33 +422,23 @@ def build_app() -> gr.Blocks:
485
 
486
  with gr.Group(elem_classes="filter-section"):
487
  gr.Markdown("### πŸŽ›οΈ Filters")
488
-
489
  dir_path = gr.State(RESULT_DIR)
490
 
491
  task_filter = gr.CheckboxGroup(
492
  label="πŸ“Š Tasks",
493
- choices=[
494
- ("GSM8K", "gsm8k"),
495
- ("LongBench", "longbench"),
496
- ("MMLU", "mmlu"),
497
- ("NuminaMath", "numinamath"),
498
- ("RULER", "ruler")
499
- ],
500
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
501
  )
502
-
503
  framework_filter = gr.CheckboxGroup(
504
  label="βš™οΈ Inference Frameworks",
505
  choices=["sglang", "vllm"],
506
  value=["sglang", "vllm"],
507
  )
508
-
509
  model_type_filter = gr.CheckboxGroup(
510
  label="πŸ€– Model Types",
511
  choices=["instruct", "thinking"],
512
  value=["instruct", "thinking"],
513
  )
514
-
515
  precision_filter = gr.CheckboxGroup(
516
  label="🎯 Precision",
517
  choices=["bfloat16", "fp8"],
@@ -526,7 +453,6 @@ def build_app() -> gr.Blocks:
526
  "- **MMLU** β€” Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n"
527
  "- **NuminaMath** β€” Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n"
528
  "- **RULER** β€” Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n"
529
-
530
  "### Metrics\n"
531
  "- **E2E(s)** β€” End-to-End Latency\n"
532
  "- **Accuracy(%)** β€” Task Accuracy\n"
@@ -538,7 +464,7 @@ def build_app() -> gr.Blocks:
538
  elem_classes="info-section"
539
  )
540
 
541
- # Right side - Table with selection and Radar Plot below
542
  with gr.Column(scale=5):
543
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
544
 
@@ -558,7 +484,6 @@ def build_app() -> gr.Blocks:
558
  )
559
  generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1, size="lg")
560
 
561
- # Modified Layout: Removed surrounding columns to allow plot to fill full width
562
  radar_plot = gr.Plot(
563
  label="",
564
  value=generate_radar_plot([]),
@@ -567,40 +492,16 @@ def build_app() -> gr.Blocks:
567
 
568
  df_data_state = gr.State([])
569
 
570
- demo.load(
571
- fn=auto_refresh_from_dir,
572
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
573
- outputs=[leaderboard_output, df_data_state],
574
- )
575
-
576
- search_input.change(
577
- fn=load_from_dir,
578
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
579
- outputs=[leaderboard_output, df_data_state],
580
- )
581
 
582
- task_filter.change(
583
- fn=load_from_dir,
584
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
585
- outputs=[leaderboard_output, df_data_state],
586
- )
587
- framework_filter.change(
588
- fn=load_from_dir,
589
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
590
- outputs=[leaderboard_output, df_data_state],
591
- )
592
- model_type_filter.change(
593
- fn=load_from_dir,
594
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
595
- outputs=[leaderboard_output, df_data_state],
596
- )
597
- precision_filter.change(
598
- fn=load_from_dir,
599
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
600
- outputs=[leaderboard_output, df_data_state],
601
- )
602
 
603
- # Generate plot on button click
604
  generate_btn.click(
605
  fn=parse_and_generate_plot,
606
  inputs=[df_data_state, row_indices_input],
@@ -608,11 +509,7 @@ def build_app() -> gr.Blocks:
608
  )
609
 
610
  timer = gr.Timer(60.0)
611
- timer.tick(
612
- fn=auto_refresh_from_dir,
613
- inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
614
- outputs=[leaderboard_output, df_data_state],
615
- )
616
 
617
  return demo
618
 
 
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
 
10
  # RESULT_DIR = "generic_result_dir"
11
  raise RuntimeError(
12
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
 
32
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
33
 
34
 
 
 
 
 
 
 
 
35
  def normalize_cost(val, max_tick, baseline=20):
36
  """Normalize cost (lower is better)."""
37
  if max_tick == 0:
 
110
  # Extract metrics from selected rows
111
  data = {}
112
  for row in selected_rows_data:
 
113
  model_name = row.get('Model', 'Unknown')
114
  if isinstance(model_name, str) and 'href' in model_name:
115
  try:
 
117
  except:
118
  pass
119
 
 
120
  method = row.get('Method', '')
121
  if isinstance(model_name, str) and '/' in model_name:
122
+ legend_name = model_name.split('/')[-1]
123
  else:
124
  legend_name = str(model_name)
125
 
 
126
  if method and method not in ['Unknown', '-', '']:
127
  legend_name = f"{legend_name}-{method}"
128
 
 
129
  acc = row.get('Accuracy(%)', 0)
130
  cost = row.get('Cost($)', 0)
131
  throughput = row.get('Decoding T/s', 0)
132
 
 
133
  try:
134
  acc = float(acc) if acc not in [None, '-', ''] else 0
135
  cost = float(cost) if cost not in [None, '-', ''] else 0
 
138
  acc, cost, throughput = 0, 0, 0
139
 
140
  data[legend_name] = {
141
+ 'accuracy': acc / 100.0 if acc > 1 else acc,
142
  'cost': cost,
143
  'throughput': throughput
144
  }
145
 
146
+ # Get min/max
147
  throughputs = [v['throughput'] for v in data.values()]
148
  costs = [v['cost'] for v in data.values()]
149
  accs = [v['accuracy'] for v in data.values()]
 
164
  normalize_cost(values['cost'], cost_max, baseline),
165
  normalize(values['accuracy'], acc_min, acc_max, baseline)
166
  ]
167
+ norm_vals += [norm_vals[0]]
168
 
169
  hovertext = [
170
  f"Throughput: {raw_vals[0]:.2f} T/s",
 
198
  ),
199
  angularaxis=dict(
200
  tickfont=dict(size=14),
201
+ rotation=90,
202
  direction='clockwise'
203
  ),
204
  ),
 
222
  model_name = "unknown-model"
223
 
224
  dataset = metrics.get("dataset", "Unknown")
 
225
  method = metrics.get("method", "Unknown")
226
  precision = metrics.get("precision", "Unknown")
227
  model_type = metrics.get("model_type", "Unknown")
 
283
  try:
284
  pattern = f"hf://datasets/{dir_path}/**/*.json"
285
  dl_mode = "force_redownload" if force_refresh else None
286
+ ds = load_dataset("json", data_files={"train": pattern}, split="train", download_mode=dl_mode)
287
+ except Exception:
288
+ return "<p>No files loaded or Dataset not found.</p>", []
 
 
 
 
 
 
 
 
289
 
290
  rows = []
291
  for i, example in enumerate(ds):
292
+ metrics = example.get("metrics") or example.get("json") or example
 
 
 
293
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
294
 
295
  if not rows:
296
+ return "<p>No records found.</p>", []
 
297
 
298
  df = pd.DataFrame(rows)
299
 
300
+ if selected_tasks:
301
+ df = df[df["Dataset"].astype(str).str.lower().isin([x.lower() for x in selected_tasks])]
302
+ if selected_frameworks:
303
+ df = df[df["Method"].astype(str).str.lower().isin([str(x).lower() for x in selected_frameworks])]
304
+ if selected_model_types:
305
+ df = df[df["Model type"].astype(str).str.lower().isin([str(x).lower() for x in selected_model_types])]
306
+ if selected_precisions:
307
+ df = df[df["Precision"].astype(str).str.lower().isin([str(x).lower() for x in selected_precisions])]
 
 
 
 
 
 
 
 
 
308
  if search_keyword and search_keyword.strip():
309
+ mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(search_keyword.strip().lower()).any(), axis=1)
 
310
  df = df[mask]
311
 
312
  if df.empty:
313
+ return "<p>No records found.</p>", []
 
314
 
315
  df = df.fillna("-")
 
 
316
  df.insert(0, 'Row #', range(len(df)))
317
 
 
318
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
319
  df_without_rownum = df.drop('Row #', axis=1)
320
+ return table_html, df_without_rownum.to_dict('records')
 
321
 
322
 
323
+ def auto_refresh_from_dir(dir_path, *args):
324
+ return load_from_dir(dir_path, *args, force_refresh=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
 
327
  def parse_and_generate_plot(df_data: list, indices_str: str):
 
328
  if not indices_str or not indices_str.strip():
329
  return generate_radar_plot([])
 
330
  try:
 
331
  indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()]
332
+ return generate_radar_plot([df_data[i] for i in indices[:3] if 0 <= i < len(df_data)])
333
+ except:
 
 
 
 
334
  return generate_radar_plot([])
335
 
336
 
 
 
337
  def build_app() -> gr.Blocks:
338
  row_css = """
339
  body { background-color: #f5f7fa !important; }
340
 
341
+ /* === 1. STICKY SIDEBAR FOR FILTERS === */
342
+ /* The column containing filters will be sticky */
343
+ .sticky-col {
344
+ position: -webkit-sticky !important;
345
+ position: sticky !important;
346
+ top: 20px !important;
347
+ height: fit-content !important;
348
+ max-height: 95vh !important;
349
+ overflow-y: auto !important; /* Only scroll if filters are taller than screen */
350
+ }
351
+
352
+ /* === 2. NO INTERNAL SCROLLBARS FOR ELEMENTS === */
353
+ /* Force Checkbox Groups to show all items (no scroll) */
354
+ .gradio-container .gr-checkbox-group,
355
+ .gradio-container .gr-radio,
356
+ .gradio-container .gr-checkbox-group label,
357
+ .gradio-container .gr-radio label {
358
+ max-height: none !important;
359
+ overflow: visible !important;
360
+ flex-wrap: wrap !important;
361
+ }
362
+
363
+ /* Remove scrolls from filter boxes and plot containers */
364
+ .filter-section, .search-box, .plot-container {
365
+ overflow: visible !important;
366
+ }
367
+
368
+ /* === 3. TABLE & GENERAL STYLING === */
369
  .metrics-table th:first-child, .metrics-table td:first-child {
370
  width: 60px !important; text-align: center !important;
371
  padding: 8px !important; font-weight: 600 !important;
 
376
  border-radius: 6px; border: 2px solid #e1e4e8 !important;
377
  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); margin-bottom: 16px;
378
  }
 
 
 
 
 
 
 
 
 
379
  .gradio-container { max-width: 100% !important; padding: 20px !important; background-color: #f5f7fa !important; }
380
+ .gradio-container .block, .gradio-container .form { background-color: white !important; border-color: #e1e4e8 !important; }
 
 
381
  .gradio-container label, .gradio-container p, .gradio-container span, .gradio-container div { color: #24292e !important; }
382
 
 
 
 
 
 
383
  .gradio-container table.metrics-table th {
384
  background: linear-gradient(to bottom, #fafbfc, #f6f8fa);
385
  font-weight: 600; position: sticky; top: 0; z-index: 10;
386
  border-bottom: 2px solid #d1d5da;
387
  }
388
+ .gradio-container table.metrics-table td { padding: 10px 14px; border: 1.5px solid #e1e4e8; white-space: nowrap; font-size: 13px; }
 
389
  .gradio-container table.metrics-table { border-collapse: collapse; width: 100%; background: white; }
 
 
 
 
 
390
 
391
+ /* Scrollable Table Container */
392
  .table-container {
393
+ overflow-x: auto; overflow-y: auto;
394
+ max-height: 60vh; /* Reduced slightly to help plot visibility */
395
  border: 2px solid #e1e4e8; border-radius: 6px;
396
  background: white; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
397
  }
398
+
399
  .filter-section {
400
  background: white !important; padding: 0 !important; border-radius: 6px;
401
  border: 2px solid #e1e4e8 !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
402
  }
 
403
  .filter-section .wrap { padding: 20px !important; }
 
 
 
 
404
  .info-section { padding: 16px; background: white !important; }
 
405
  .gradio-container h1 { color: #24292e !important; font-weight: 700; margin-bottom: 24px; }
406
  .gradio-container h3 { color: #24292e !important; font-weight: 600; margin-bottom: 16px; }
 
407
  """
408
 
409
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
410
  gr.Markdown("# MoE-CAP Dashboard")
411
 
412
  with gr.Row():
413
+ # Left side - Filters (Sticky)
414
+ # Added elem_classes="sticky-col" to make this column stay on screen
415
+ with gr.Column(scale=2, elem_classes="sticky-col"):
416
  with gr.Group(elem_classes="search-box"):
417
  search_input = gr.Textbox(
418
  label="πŸ” Search",
 
422
 
423
  with gr.Group(elem_classes="filter-section"):
424
  gr.Markdown("### πŸŽ›οΈ Filters")
 
425
  dir_path = gr.State(RESULT_DIR)
426
 
427
  task_filter = gr.CheckboxGroup(
428
  label="πŸ“Š Tasks",
429
+ choices=[("GSM8K", "gsm8k"), ("LongBench", "longbench"), ("MMLU", "mmlu"), ("NuminaMath", "numinamath"), ("RULER", "ruler")],
 
 
 
 
 
 
430
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
431
  )
 
432
  framework_filter = gr.CheckboxGroup(
433
  label="βš™οΈ Inference Frameworks",
434
  choices=["sglang", "vllm"],
435
  value=["sglang", "vllm"],
436
  )
 
437
  model_type_filter = gr.CheckboxGroup(
438
  label="πŸ€– Model Types",
439
  choices=["instruct", "thinking"],
440
  value=["instruct", "thinking"],
441
  )
 
442
  precision_filter = gr.CheckboxGroup(
443
  label="🎯 Precision",
444
  choices=["bfloat16", "fp8"],
 
453
  "- **MMLU** β€” Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n"
454
  "- **NuminaMath** β€” Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n"
455
  "- **RULER** β€” Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n"
 
456
  "### Metrics\n"
457
  "- **E2E(s)** β€” End-to-End Latency\n"
458
  "- **Accuracy(%)** β€” Task Accuracy\n"
 
464
  elem_classes="info-section"
465
  )
466
 
467
+ # Right side - Table and Plot
468
  with gr.Column(scale=5):
469
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
470
 
 
484
  )
485
  generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1, size="lg")
486
 
 
487
  radar_plot = gr.Plot(
488
  label="",
489
  value=generate_radar_plot([]),
 
492
 
493
  df_data_state = gr.State([])
494
 
495
+ inputs = [dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input]
496
+ outputs = [leaderboard_output, df_data_state]
 
 
 
 
 
 
 
 
 
497
 
498
+ demo.load(fn=auto_refresh_from_dir, inputs=inputs, outputs=outputs)
499
+ search_input.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
500
+ task_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
501
+ framework_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
502
+ model_type_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
503
+ precision_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
 
505
  generate_btn.click(
506
  fn=parse_and_generate_plot,
507
  inputs=[df_data_state, row_indices_input],
 
509
  )
510
 
511
  timer = gr.Timer(60.0)
512
+ timer.tick(fn=auto_refresh_from_dir, inputs=inputs, outputs=outputs)
 
 
 
 
513
 
514
  return demo
515