AppleSwing commited on
Commit
423d316
Β·
verified Β·
1 Parent(s): 9a48e69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -73
app.py CHANGED
@@ -7,6 +7,7 @@ os.environ["GRADIO_LANGUAGE"] = "en"
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
 
10
  # RESULT_DIR = "generic_result_dir"
11
  raise RuntimeError(
12
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
@@ -32,6 +33,13 @@ def normalize(val, vmin, vmax, baseline=20):
32
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
33
 
34
 
 
 
 
 
 
 
 
35
  def normalize_cost(val, max_tick, baseline=20):
36
  """Normalize cost (lower is better)."""
37
  if max_tick == 0:
@@ -110,6 +118,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
110
  # Extract metrics from selected rows
111
  data = {}
112
  for row in selected_rows_data:
 
113
  model_name = row.get('Model', 'Unknown')
114
  if isinstance(model_name, str) and 'href' in model_name:
115
  try:
@@ -117,19 +126,23 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
117
  except:
118
  pass
119
 
 
120
  method = row.get('Method', '')
121
  if isinstance(model_name, str) and '/' in model_name:
122
- legend_name = model_name.split('/')[-1]
123
  else:
124
  legend_name = str(model_name)
125
 
 
126
  if method and method not in ['Unknown', '-', '']:
127
  legend_name = f"{legend_name}-{method}"
128
 
 
129
  acc = row.get('Accuracy(%)', 0)
130
  cost = row.get('Cost($)', 0)
131
  throughput = row.get('Decoding T/s', 0)
132
 
 
133
  try:
134
  acc = float(acc) if acc not in [None, '-', ''] else 0
135
  cost = float(cost) if cost not in [None, '-', ''] else 0
@@ -138,12 +151,12 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
138
  acc, cost, throughput = 0, 0, 0
139
 
140
  data[legend_name] = {
141
- 'accuracy': acc / 100.0 if acc > 1 else acc,
142
  'cost': cost,
143
  'throughput': throughput
144
  }
145
 
146
- # Get min/max
147
  throughputs = [v['throughput'] for v in data.values()]
148
  costs = [v['cost'] for v in data.values()]
149
  accs = [v['accuracy'] for v in data.values()]
@@ -164,7 +177,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
164
  normalize_cost(values['cost'], cost_max, baseline),
165
  normalize(values['accuracy'], acc_min, acc_max, baseline)
166
  ]
167
- norm_vals += [norm_vals[0]]
168
 
169
  hovertext = [
170
  f"Throughput: {raw_vals[0]:.2f} T/s",
@@ -198,7 +211,7 @@ def generate_radar_plot(selected_rows_data: List[dict]) -> go.Figure:
198
  ),
199
  angularaxis=dict(
200
  tickfont=dict(size=14),
201
- rotation=90,
202
  direction='clockwise'
203
  ),
204
  ),
@@ -222,6 +235,7 @@ def json_to_row(path: str, metrics: dict) -> dict:
222
  model_name = "unknown-model"
223
 
224
  dataset = metrics.get("dataset", "Unknown")
 
225
  method = metrics.get("method", "Unknown")
226
  precision = metrics.get("precision", "Unknown")
227
  model_type = metrics.get("model_type", "Unknown")
@@ -283,89 +297,113 @@ def load_from_dir(
283
  try:
284
  pattern = f"hf://datasets/{dir_path}/**/*.json"
285
  dl_mode = "force_redownload" if force_refresh else None
286
- ds = load_dataset("json", data_files={"train": pattern}, split="train", download_mode=dl_mode)
287
- except Exception:
288
- return "<p>No files loaded or Dataset not found.</p>", []
 
 
 
 
 
 
 
 
289
 
290
  rows = []
291
  for i, example in enumerate(ds):
292
- metrics = example.get("metrics") or example.get("json") or example
 
 
 
293
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
294
 
295
  if not rows:
296
- return "<p>No records found.</p>", []
 
297
 
298
  df = pd.DataFrame(rows)
299
 
300
- if selected_tasks:
301
- df = df[df["Dataset"].astype(str).str.lower().isin([x.lower() for x in selected_tasks])]
302
- if selected_frameworks:
303
- df = df[df["Method"].astype(str).str.lower().isin([str(x).lower() for x in selected_frameworks])]
304
- if selected_model_types:
305
- df = df[df["Model type"].astype(str).str.lower().isin([str(x).lower() for x in selected_model_types])]
306
- if selected_precisions:
307
- df = df[df["Precision"].astype(str).str.lower().isin([str(x).lower() for x in selected_precisions])]
 
 
 
 
 
 
 
 
 
308
  if search_keyword and search_keyword.strip():
309
- mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(search_keyword.strip().lower()).any(), axis=1)
 
310
  df = df[mask]
311
 
312
  if df.empty:
313
- return "<p>No records found.</p>", []
 
314
 
315
  df = df.fillna("-")
 
 
316
  df.insert(0, 'Row #', range(len(df)))
317
 
 
318
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
319
  df_without_rownum = df.drop('Row #', axis=1)
320
- return table_html, df_without_rownum.to_dict('records')
 
321
 
322
 
323
- def auto_refresh_from_dir(dir_path, *args):
324
- return load_from_dir(dir_path, *args, force_refresh=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
 
327
  def parse_and_generate_plot(df_data: list, indices_str: str):
 
328
  if not indices_str or not indices_str.strip():
329
  return generate_radar_plot([])
 
330
  try:
 
331
  indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()]
332
- return generate_radar_plot([df_data[i] for i in indices[:3] if 0 <= i < len(df_data)])
333
- except:
 
 
 
 
334
  return generate_radar_plot([])
335
 
336
 
 
 
337
  def build_app() -> gr.Blocks:
338
  row_css = """
339
  body { background-color: #f5f7fa !important; }
340
 
341
- /* === 1. STICKY SIDEBAR FOR FILTERS === */
342
- /* The column containing filters will be sticky */
343
- .sticky-col {
344
- position: -webkit-sticky !important;
345
- position: sticky !important;
346
- top: 20px !important;
347
- height: fit-content !important;
348
- max-height: 95vh !important;
349
- overflow-y: auto !important; /* Only scroll if filters are taller than screen */
350
- }
351
-
352
- /* === 2. NO INTERNAL SCROLLBARS FOR ELEMENTS === */
353
- /* Force Checkbox Groups to show all items (no scroll) */
354
- .gradio-container .gr-checkbox-group,
355
- .gradio-container .gr-radio,
356
- .gradio-container .gr-checkbox-group label,
357
- .gradio-container .gr-radio label {
358
- max-height: none !important;
359
- overflow: visible !important;
360
- flex-wrap: wrap !important;
361
- }
362
-
363
- /* Remove scrolls from filter boxes and plot containers */
364
- .filter-section, .search-box, .plot-container {
365
- overflow: visible !important;
366
- }
367
-
368
- /* === 3. TABLE & GENERAL STYLING === */
369
  .metrics-table th:first-child, .metrics-table td:first-child {
370
  width: 60px !important; text-align: center !important;
371
  padding: 8px !important; font-weight: 600 !important;
@@ -376,43 +414,68 @@ def build_app() -> gr.Blocks:
376
  border-radius: 6px; border: 2px solid #e1e4e8 !important;
377
  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); margin-bottom: 16px;
378
  }
 
 
 
 
 
 
 
 
 
379
  .gradio-container { max-width: 100% !important; padding: 20px !important; background-color: #f5f7fa !important; }
380
- .gradio-container .block, .gradio-container .form { background-color: white !important; border-color: #e1e4e8 !important; }
 
 
381
  .gradio-container label, .gradio-container p, .gradio-container span, .gradio-container div { color: #24292e !important; }
382
 
 
 
 
 
 
383
  .gradio-container table.metrics-table th {
384
  background: linear-gradient(to bottom, #fafbfc, #f6f8fa);
385
  font-weight: 600; position: sticky; top: 0; z-index: 10;
386
  border-bottom: 2px solid #d1d5da;
387
  }
388
- .gradio-container table.metrics-table td { padding: 10px 14px; border: 1.5px solid #e1e4e8; white-space: nowrap; font-size: 13px; }
 
389
  .gradio-container table.metrics-table { border-collapse: collapse; width: 100%; background: white; }
 
 
 
 
 
390
 
391
- /* Scrollable Table Container */
392
  .table-container {
393
- overflow-x: auto; overflow-y: auto;
394
- max-height: 60vh; /* Reduced slightly to help plot visibility */
395
  border: 2px solid #e1e4e8; border-radius: 6px;
396
  background: white; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
397
  }
398
-
399
  .filter-section {
400
  background: white !important; padding: 0 !important; border-radius: 6px;
401
  border: 2px solid #e1e4e8 !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
402
  }
 
403
  .filter-section .wrap { padding: 20px !important; }
 
 
 
 
404
  .info-section { padding: 16px; background: white !important; }
 
405
  .gradio-container h1 { color: #24292e !important; font-weight: 700; margin-bottom: 24px; }
406
  .gradio-container h3 { color: #24292e !important; font-weight: 600; margin-bottom: 16px; }
 
407
  """
408
 
409
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
410
  gr.Markdown("# MoE-CAP Dashboard")
411
 
412
  with gr.Row():
413
- # Left side - Filters (Sticky)
414
- # Added elem_classes="sticky-col" to make this column stay on screen
415
- with gr.Column(scale=2, elem_classes="sticky-col"):
416
  with gr.Group(elem_classes="search-box"):
417
  search_input = gr.Textbox(
418
  label="πŸ” Search",
@@ -422,23 +485,33 @@ def build_app() -> gr.Blocks:
422
 
423
  with gr.Group(elem_classes="filter-section"):
424
  gr.Markdown("### πŸŽ›οΈ Filters")
 
425
  dir_path = gr.State(RESULT_DIR)
426
 
427
  task_filter = gr.CheckboxGroup(
428
  label="πŸ“Š Tasks",
429
- choices=[("GSM8K", "gsm8k"), ("LongBench", "longbench"), ("MMLU", "mmlu"), ("NuminaMath", "numinamath"), ("RULER", "ruler")],
 
 
 
 
 
 
430
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
431
  )
 
432
  framework_filter = gr.CheckboxGroup(
433
  label="βš™οΈ Inference Frameworks",
434
  choices=["sglang", "vllm"],
435
  value=["sglang", "vllm"],
436
  )
 
437
  model_type_filter = gr.CheckboxGroup(
438
  label="πŸ€– Model Types",
439
  choices=["instruct", "thinking"],
440
  value=["instruct", "thinking"],
441
  )
 
442
  precision_filter = gr.CheckboxGroup(
443
  label="🎯 Precision",
444
  choices=["bfloat16", "fp8"],
@@ -453,6 +526,7 @@ def build_app() -> gr.Blocks:
453
  "- **MMLU** β€” Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n"
454
  "- **NuminaMath** β€” Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n"
455
  "- **RULER** β€” Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n"
 
456
  "### Metrics\n"
457
  "- **E2E(s)** β€” End-to-End Latency\n"
458
  "- **Accuracy(%)** β€” Task Accuracy\n"
@@ -464,7 +538,7 @@ def build_app() -> gr.Blocks:
464
  elem_classes="info-section"
465
  )
466
 
467
- # Right side - Table and Plot
468
  with gr.Column(scale=5):
469
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
470
 
@@ -484,6 +558,7 @@ def build_app() -> gr.Blocks:
484
  )
485
  generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1, size="lg")
486
 
 
487
  radar_plot = gr.Plot(
488
  label="",
489
  value=generate_radar_plot([]),
@@ -492,16 +567,40 @@ def build_app() -> gr.Blocks:
492
 
493
  df_data_state = gr.State([])
494
 
495
- inputs = [dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input]
496
- outputs = [leaderboard_output, df_data_state]
 
 
 
 
 
 
 
 
 
497
 
498
- demo.load(fn=auto_refresh_from_dir, inputs=inputs, outputs=outputs)
499
- search_input.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
500
- task_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
501
- framework_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
502
- model_type_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
503
- precision_filter.change(fn=load_from_dir, inputs=inputs, outputs=outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
 
505
  generate_btn.click(
506
  fn=parse_and_generate_plot,
507
  inputs=[df_data_state, row_indices_input],
@@ -509,7 +608,11 @@ def build_app() -> gr.Blocks:
509
  )
510
 
511
  timer = gr.Timer(60.0)
512
- timer.tick(fn=auto_refresh_from_dir, inputs=inputs, outputs=outputs)
 
 
 
 
513
 
514
  return demo
515
 
 
7
 
8
  RESULT_DIR = os.environ.get("MOECAP_RESULT_DIR")
9
  if not RESULT_DIR:
10
+ # For testing purposes, you can uncomment the line below to set a dummy dir or keep the raise
11
  # RESULT_DIR = "generic_result_dir"
12
  raise RuntimeError(
13
  "MOECAP_RESULT_DIR is not set. Please set MOECAP_RESULT_DIR (HF Repo ID) before running app.py"
 
33
  return baseline + (val - vmin) / (vmax - vmin) * (100 - baseline)
34
 
35
 
36
+ def normalize_reversed(val, vmin, vmax, baseline=20):
37
+ """Normalize value (reversed - lower is better) to baseline-100 range."""
38
+ if vmax == vmin:
39
+ return baseline + 40
40
+ return baseline + (vmax - val) / (vmax - vmin) * (100 - baseline)
41
+
42
+
43
  def normalize_cost(val, max_tick, baseline=20):
44
  """Normalize cost (lower is better)."""
45
  if max_tick == 0:
 
118
  # Extract metrics from selected rows
119
  data = {}
120
  for row in selected_rows_data:
121
+ # Extract model name from HTML or use as-is
122
  model_name = row.get('Model', 'Unknown')
123
  if isinstance(model_name, str) and 'href' in model_name:
124
  try:
 
126
  except:
127
  pass
128
 
129
+ # Format legend name: extract name after "/" and add method
130
  method = row.get('Method', '')
131
  if isinstance(model_name, str) and '/' in model_name:
132
+ legend_name = model_name.split('/')[-1] # Get part after last /
133
  else:
134
  legend_name = str(model_name)
135
 
136
+ # Add method suffix
137
  if method and method not in ['Unknown', '-', '']:
138
  legend_name = f"{legend_name}-{method}"
139
 
140
+ # Get metrics
141
  acc = row.get('Accuracy(%)', 0)
142
  cost = row.get('Cost($)', 0)
143
  throughput = row.get('Decoding T/s', 0)
144
 
145
+ # Convert to float if needed
146
  try:
147
  acc = float(acc) if acc not in [None, '-', ''] else 0
148
  cost = float(cost) if cost not in [None, '-', ''] else 0
 
151
  acc, cost, throughput = 0, 0, 0
152
 
153
  data[legend_name] = {
154
+ 'accuracy': acc / 100.0 if acc > 1 else acc, # Normalize to 0-1
155
  'cost': cost,
156
  'throughput': throughput
157
  }
158
 
159
+ # Get min/max for normalization
160
  throughputs = [v['throughput'] for v in data.values()]
161
  costs = [v['cost'] for v in data.values()]
162
  accs = [v['accuracy'] for v in data.values()]
 
177
  normalize_cost(values['cost'], cost_max, baseline),
178
  normalize(values['accuracy'], acc_min, acc_max, baseline)
179
  ]
180
+ norm_vals += [norm_vals[0]] # Close the loop
181
 
182
  hovertext = [
183
  f"Throughput: {raw_vals[0]:.2f} T/s",
 
211
  ),
212
  angularaxis=dict(
213
  tickfont=dict(size=14),
214
+ rotation=90, # Rotate so top is 12 o'clock
215
  direction='clockwise'
216
  ),
217
  ),
 
235
  model_name = "unknown-model"
236
 
237
  dataset = metrics.get("dataset", "Unknown")
238
+
239
  method = metrics.get("method", "Unknown")
240
  precision = metrics.get("precision", "Unknown")
241
  model_type = metrics.get("model_type", "Unknown")
 
297
  try:
298
  pattern = f"hf://datasets/{dir_path}/**/*.json"
299
  dl_mode = "force_redownload" if force_refresh else None
300
+
301
+ print(f"Fetching from {pattern} (mode={dl_mode})...")
302
+ ds = load_dataset(
303
+ "json",
304
+ data_files={"train": pattern},
305
+ split="train",
306
+ download_mode=dl_mode,
307
+ )
308
+ except Exception as e:
309
+ empty_html = "<p>No files loaded or Dataset not found.</p>"
310
+ return empty_html, []
311
 
312
  rows = []
313
  for i, example in enumerate(ds):
314
+ if isinstance(example, dict):
315
+ metrics = example.get("metrics") or example.get("json") or example
316
+ else:
317
+ metrics = example
318
  rows.append(json_to_row(f"{dir_path}#{i}", metrics))
319
 
320
  if not rows:
321
+ empty_html = "<p>No records found.</p>"
322
+ return empty_html, []
323
 
324
  df = pd.DataFrame(rows)
325
 
326
+ # Filters
327
+ if selected_tasks is not None:
328
+ lower_selected = [x.lower() for x in selected_tasks]
329
+ df = df[df["Dataset"].astype(str).str.lower().isin(lower_selected)]
330
+
331
+ if selected_frameworks is not None:
332
+ lower_selected = [str(x).lower() for x in selected_frameworks]
333
+ df = df[df["Method"].astype(str).str.lower().isin(lower_selected)]
334
+
335
+ if selected_model_types is not None:
336
+ lower_selected = [str(x).lower() for x in selected_model_types]
337
+ df = df[df["Model type"].astype(str).str.lower().isin(lower_selected)]
338
+
339
+ if selected_precisions is not None:
340
+ lower_selected = [str(x).lower() for x in selected_precisions]
341
+ df = df[df["Precision"].astype(str).str.lower().isin(lower_selected)]
342
+
343
  if search_keyword and search_keyword.strip():
344
+ keyword_lower = search_keyword.strip().lower()
345
+ mask = df.astype(str).apply(lambda row: row.str.lower().str.contains(keyword_lower).any(), axis=1)
346
  df = df[mask]
347
 
348
  if df.empty:
349
+ empty_html = "<p>No records found.</p>"
350
+ return empty_html, []
351
 
352
  df = df.fillna("-")
353
+
354
+ # Insert row number column at the beginning
355
  df.insert(0, 'Row #', range(len(df)))
356
 
357
+ # Create HTML table
358
  table_html = f'<div class="table-container">{df.to_html(escape=False, index=False, classes="metrics-table")}</div>'
359
  df_without_rownum = df.drop('Row #', axis=1)
360
+ df_dict = df_without_rownum.to_dict('records')
361
+ return table_html, df_dict
362
 
363
 
364
+ def auto_refresh_from_dir(
365
+ dir_path: str,
366
+ selected_tasks: List[str] | None = None,
367
+ selected_frameworks: List[str] | None = None,
368
+ selected_model_types: List[str] | None = None,
369
+ selected_precisions: List[str] | None = None,
370
+ search_keyword: str = "",
371
+ ):
372
+ return load_from_dir(
373
+ dir_path,
374
+ selected_tasks=selected_tasks,
375
+ selected_frameworks=selected_frameworks,
376
+ selected_model_types=selected_model_types,
377
+ selected_precisions=selected_precisions,
378
+ search_keyword=search_keyword,
379
+ force_refresh=True,
380
+ )
381
 
382
 
383
  def parse_and_generate_plot(df_data: list, indices_str: str):
384
+ """Parse comma-separated indices and generate radar plot."""
385
  if not indices_str or not indices_str.strip():
386
  return generate_radar_plot([])
387
+
388
  try:
389
+ # Parse comma-separated indices
390
  indices = [int(idx.strip()) for idx in indices_str.split(',') if idx.strip()]
391
+ # Limit to 3 rows
392
+ indices = indices[:3]
393
+ # Get selected rows
394
+ selected_rows = [df_data[i] for i in indices if 0 <= i < len(df_data)]
395
+ return generate_radar_plot(selected_rows)
396
+ except (ValueError, IndexError):
397
  return generate_radar_plot([])
398
 
399
 
400
+ # Gradio UI
401
+
402
  def build_app() -> gr.Blocks:
403
  row_css = """
404
  body { background-color: #f5f7fa !important; }
405
 
406
+ /* Row number column styling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  .metrics-table th:first-child, .metrics-table td:first-child {
408
  width: 60px !important; text-align: center !important;
409
  padding: 8px !important; font-weight: 600 !important;
 
414
  border-radius: 6px; border: 2px solid #e1e4e8 !important;
415
  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06); margin-bottom: 16px;
416
  }
417
+ .search-box .block { background: transparent !important; border: none !important; padding: 0 !important; }
418
+ .search-box label span { color: #24292e !important; font-weight: 600; font-size: 14px; margin-bottom: 8px; }
419
+ .search-box input.scroll-hide {
420
+ background-color: white !important; color: #24292e !important;
421
+ border: 1.5px solid #e1e4e8 !important; border-radius: 4px !important;
422
+ padding: 10px !important; box-shadow: none !important;
423
+ }
424
+ .search-box input.scroll-hide:focus { border-color: #0366d6 !important; outline: none !important; }
425
+
426
  .gradio-container { max-width: 100% !important; padding: 20px !important; background-color: #f5f7fa !important; }
427
+ .gradio-container .block, .gradio-container .form, .gradio-container .gr-box, .gradio-container .gr-input {
428
+ background-color: white !important; border-color: #e1e4e8 !important;
429
+ }
430
  .gradio-container label, .gradio-container p, .gradio-container span, .gradio-container div { color: #24292e !important; }
431
 
432
+ /* Table styling */
433
+ .gradio-container table.metrics-table th, .gradio-container table.metrics-table td {
434
+ padding: 10px 14px; border: 1.5px solid #e1e4e8; white-space: nowrap;
435
+ font-size: 13px; text-align: left; color: #24292e !important;
436
+ }
437
  .gradio-container table.metrics-table th {
438
  background: linear-gradient(to bottom, #fafbfc, #f6f8fa);
439
  font-weight: 600; position: sticky; top: 0; z-index: 10;
440
  border-bottom: 2px solid #d1d5da;
441
  }
442
+ .gradio-container table.metrics-table tbody tr:nth-child(even) { background-color: #f6f8fa; }
443
+ .gradio-container table.metrics-table tbody tr:hover { background-color: #e1e4e8; }
444
  .gradio-container table.metrics-table { border-collapse: collapse; width: 100%; background: white; }
445
+ .gradio-container table.metrics-table a { color: #0366d6 !important; text-decoration: none; }
446
+ .gradio-container table.metrics-table a:hover { text-decoration: underline; }
447
+
448
+ /* Allow plot container to expand */
449
+ .gradio-container .plot-container { width: 100% !important; }
450
 
 
451
  .table-container {
452
+ overflow-x: auto; overflow-y: auto; max-height: 75vh;
 
453
  border: 2px solid #e1e4e8; border-radius: 6px;
454
  background: white; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
455
  }
 
456
  .filter-section {
457
  background: white !important; padding: 0 !important; border-radius: 6px;
458
  border: 2px solid #e1e4e8 !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
459
  }
460
+ .filter-section .wrap, .filter-section .block, .filter-section .container, .filter-section .group { background: transparent !important; }
461
  .filter-section .wrap { padding: 20px !important; }
462
+ .gradio-container .accordion {
463
+ background: white !important; border: 2px solid #e1e4e8 !important;
464
+ border-radius: 6px !important; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
465
+ }
466
  .info-section { padding: 16px; background: white !important; }
467
+ .info-section a { color: #0366d6 !important; }
468
  .gradio-container h1 { color: #24292e !important; font-weight: 700; margin-bottom: 24px; }
469
  .gradio-container h3 { color: #24292e !important; font-weight: 600; margin-bottom: 16px; }
470
+ .gradio-container input[type="checkbox"] { accent-color: #0366d6 !important; }
471
  """
472
 
473
  with gr.Blocks(title="MoE-CAP Dashboard", css=row_css, theme=gr.themes.Default()) as demo:
474
  gr.Markdown("# MoE-CAP Dashboard")
475
 
476
  with gr.Row():
477
+ # Left side - Filters (narrower)
478
+ with gr.Column(scale=2):
 
479
  with gr.Group(elem_classes="search-box"):
480
  search_input = gr.Textbox(
481
  label="πŸ” Search",
 
485
 
486
  with gr.Group(elem_classes="filter-section"):
487
  gr.Markdown("### πŸŽ›οΈ Filters")
488
+
489
  dir_path = gr.State(RESULT_DIR)
490
 
491
  task_filter = gr.CheckboxGroup(
492
  label="πŸ“Š Tasks",
493
+ choices=[
494
+ ("GSM8K", "gsm8k"),
495
+ ("LongBench", "longbench"),
496
+ ("MMLU", "mmlu"),
497
+ ("NuminaMath", "numinamath"),
498
+ ("RULER", "ruler")
499
+ ],
500
  value=["gsm8k", "longbench", "mmlu", "numinamath", "ruler"]
501
  )
502
+
503
  framework_filter = gr.CheckboxGroup(
504
  label="βš™οΈ Inference Frameworks",
505
  choices=["sglang", "vllm"],
506
  value=["sglang", "vllm"],
507
  )
508
+
509
  model_type_filter = gr.CheckboxGroup(
510
  label="πŸ€– Model Types",
511
  choices=["instruct", "thinking"],
512
  value=["instruct", "thinking"],
513
  )
514
+
515
  precision_filter = gr.CheckboxGroup(
516
  label="🎯 Precision",
517
  choices=["bfloat16", "fp8"],
 
526
  "- **MMLU** β€” Multitask Language Understanding ([paper](https://arxiv.org/abs/2009.03300))\n"
527
  "- **NuminaMath** β€” Mathematical Reasoning ([paper](http://faculty.bicmr.pku.edu.cn/~dongbin/Publications/numina_dataset.pdf))\n"
528
  "- **RULER** β€” Extreme Long-Context Eval ([paper](https://arxiv.org/abs/2404.06654))\n\n"
529
+
530
  "### Metrics\n"
531
  "- **E2E(s)** β€” End-to-End Latency\n"
532
  "- **Accuracy(%)** β€” Task Accuracy\n"
 
538
  elem_classes="info-section"
539
  )
540
 
541
+ # Right side - Table with selection and Radar Plot below
542
  with gr.Column(scale=5):
543
  leaderboard_output = gr.HTML(label="πŸ“ˆ Results")
544
 
 
558
  )
559
  generate_btn = gr.Button("🎯 Generate", variant="primary", scale=1, size="lg")
560
 
561
+ # Modified Layout: Removed surrounding columns to allow plot to fill full width
562
  radar_plot = gr.Plot(
563
  label="",
564
  value=generate_radar_plot([]),
 
567
 
568
  df_data_state = gr.State([])
569
 
570
+ demo.load(
571
+ fn=auto_refresh_from_dir,
572
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
573
+ outputs=[leaderboard_output, df_data_state],
574
+ )
575
+
576
+ search_input.change(
577
+ fn=load_from_dir,
578
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
579
+ outputs=[leaderboard_output, df_data_state],
580
+ )
581
 
582
+ task_filter.change(
583
+ fn=load_from_dir,
584
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
585
+ outputs=[leaderboard_output, df_data_state],
586
+ )
587
+ framework_filter.change(
588
+ fn=load_from_dir,
589
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
590
+ outputs=[leaderboard_output, df_data_state],
591
+ )
592
+ model_type_filter.change(
593
+ fn=load_from_dir,
594
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
595
+ outputs=[leaderboard_output, df_data_state],
596
+ )
597
+ precision_filter.change(
598
+ fn=load_from_dir,
599
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
600
+ outputs=[leaderboard_output, df_data_state],
601
+ )
602
 
603
+ # Generate plot on button click
604
  generate_btn.click(
605
  fn=parse_and_generate_plot,
606
  inputs=[df_data_state, row_indices_input],
 
608
  )
609
 
610
  timer = gr.Timer(60.0)
611
+ timer.tick(
612
+ fn=auto_refresh_from_dir,
613
+ inputs=[dir_path, task_filter, framework_filter, model_type_filter, precision_filter, search_input],
614
+ outputs=[leaderboard_output, df_data_state],
615
+ )
616
 
617
  return demo
618