agh123 commited on
Commit
369eb02
·
1 Parent(s): 8743cca

feat: add plot ranking ladder

Browse files
Files changed (1) hide show
  1. src/components/visualizations.py +182 -14
src/components/visualizations.py CHANGED
@@ -7,6 +7,8 @@ import plotly.express as px
7
  import pandas as pd
8
  from typing import Optional, Dict, List, Set
9
  from ..core.glicko2_ranking import analyze_glicko2_rankings
 
 
10
 
11
 
12
  def clean_device_id(device_id: str) -> str:
@@ -315,6 +317,180 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
315
  )
316
 
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  def render_device_rankings(df: pd.DataFrame):
319
  """Render device rankings using Glicko-2 algorithm."""
320
  if df.empty:
@@ -432,6 +608,12 @@ def render_device_rankings(df: pd.DataFrame):
432
  g2_confident_display.index = g2_confident_display.index + 1
433
  g2_confident_display = g2_confident_display.rename_axis("Rank")
434
 
 
 
 
 
 
 
435
  # Display the table
436
  st.dataframe(
437
  g2_confident_display[
@@ -450,20 +632,6 @@ def render_device_rankings(df: pd.DataFrame):
450
  hide_index=False,
451
  )
452
 
453
- # # Platform statistics
454
- # st.markdown("#### Platform Statistics")
455
- # platform_stats = (
456
- # g2_confident_display.groupby("Platform")
457
- # .agg(
458
- # {
459
- # "Rating": ["mean", "std"],
460
- # }
461
- # )
462
- # .round(0)
463
- # .astype(int)
464
- # )
465
- # st.dataframe(platform_stats, use_container_width=True)
466
-
467
  else:
468
  st.warning(
469
  "No confident rankings available. Try adjusting the minimum matches threshold."
 
7
  import pandas as pd
8
  from typing import Optional, Dict, List, Set
9
  from ..core.glicko2_ranking import analyze_glicko2_rankings
10
+ import plotly.graph_objects as go
11
+ import numpy as np
12
 
13
 
14
  def clean_device_id(device_id: str) -> str:
 
317
  )
318
 
319
 
320
+ def create_device_radar_chart(g2_confident_display: pd.DataFrame, top_n: int = 10):
321
+ """Create a radar chart comparing the top N devices across different performance metrics."""
322
+ # Select top N devices
323
+ top_devices = g2_confident_display.nlargest(top_n, "Rating")
324
+
325
+ # Normalize metrics to 0-100 scale for better visualization
326
+ metrics = ["Rating", "Token Rating", "Prompt Rating"]
327
+ for metric in metrics:
328
+ min_val = top_devices[metric].min()
329
+ max_val = top_devices[metric].max()
330
+ top_devices[f"{metric}_normalized"] = (
331
+ (top_devices[metric] - min_val) / (max_val - min_val)
332
+ ) * 100
333
+
334
+ # Create radar chart
335
+ fig = go.Figure()
336
+
337
+ # Add a trace for each device
338
+ for idx, row in top_devices.iterrows():
339
+ fig.add_trace(
340
+ go.Scatterpolar(
341
+ r=[
342
+ row["Rating_normalized"],
343
+ row["Token Rating_normalized"],
344
+ row["Prompt Rating_normalized"],
345
+ row["Rating_normalized"], # Close the shape
346
+ ],
347
+ theta=["Overall", "Token Gen", "Prompt Proc", "Overall"],
348
+ fill="toself",
349
+ name=f"{row['Device']} ({row['Platform']})",
350
+ line=dict(
351
+ color=px.colors.qualitative.Set1[
352
+ idx % len(px.colors.qualitative.Set1)
353
+ ]
354
+ ),
355
+ hovertemplate="<b>%{name}</b><br>"
356
+ + "Overall: %{r[0]:.1f}%<br>"
357
+ + "Token Gen: %{r[1]:.1f}%<br>"
358
+ + "Prompt Proc: %{r[2]:.1f}%<br>"
359
+ + "<extra></extra>",
360
+ )
361
+ )
362
+
363
+ # Update layout
364
+ fig.update_layout(
365
+ polar=dict(
366
+ radialaxis=dict(visible=True, range=[0, 100], tickfont=dict(size=10)),
367
+ angularaxis=dict(tickfont=dict(size=12)),
368
+ ),
369
+ showlegend=True,
370
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
371
+ title=dict(
372
+ text=f"Performance Comparison of Top {top_n} Devices",
373
+ x=0.5,
374
+ y=0.95,
375
+ font=dict(size=16),
376
+ ),
377
+ margin=dict(t=100, l=50, r=50, b=50),
378
+ height=600,
379
+ )
380
+
381
+ return fig
382
+
383
+
384
+ def create_ranking_ladder(g2_confident_display: pd.DataFrame, top_n: int = 20):
385
+ """Create a ranking ladder visualization showing device positions and confidence intervals."""
386
+ # Select top N devices
387
+ top_devices = g2_confident_display.nlargest(top_n, "Rating").copy()
388
+
389
+ # Create y-axis positions (rank 1 at top)
390
+ top_devices["rank_position"] = np.arange(1, len(top_devices) + 1)
391
+
392
+ # Create figure
393
+ fig = go.Figure()
394
+
395
+ # Add confidence intervals
396
+ for idx, row in top_devices.iterrows():
397
+ # Add confidence interval bars
398
+ fig.add_trace(
399
+ go.Scatter(
400
+ x=[
401
+ row["Rating"] - row["Rating Deviation"],
402
+ row["Rating"] + row["Rating Deviation"],
403
+ ],
404
+ y=[row["rank_position"], row["rank_position"]],
405
+ mode="lines",
406
+ line=dict(color="rgba(0,0,0,0.3)", width=8),
407
+ showlegend=False,
408
+ hoverinfo="skip",
409
+ )
410
+ )
411
+
412
+ # Add rating points
413
+ for platform in top_devices["Platform"].unique():
414
+ platform_devices = top_devices[top_devices["Platform"] == platform]
415
+ fig.add_trace(
416
+ go.Scatter(
417
+ x=platform_devices["Rating"],
418
+ y=platform_devices["rank_position"],
419
+ mode="markers+text",
420
+ marker=dict(
421
+ size=12,
422
+ color=px.colors.qualitative.Set1[
423
+ list(top_devices["Platform"].unique()).index(platform)
424
+ % len(px.colors.qualitative.Set1)
425
+ ],
426
+ ),
427
+ text=platform_devices["Device"],
428
+ textposition="middle right",
429
+ textfont=dict(
430
+ color="rgba(0,0,0,1.0)", # Full black for maximum contrast
431
+ size=12, # Slightly larger
432
+ family="Arial Black, sans-serif", # Bold font
433
+ ),
434
+ name=platform,
435
+ hovertemplate="<b>%{text}</b><br>"
436
+ + "Rank: #%{y:.0f}<br>"
437
+ + "Rating: %{x:.0f}<br>"
438
+ + "Deviation: ±%{customdata[0]:.0f}<br>"
439
+ + "<extra></extra>",
440
+ customdata=platform_devices[["Rating Deviation"]].values,
441
+ )
442
+ )
443
+
444
+ # Update layout
445
+ fig.update_layout(
446
+ title=dict(
447
+ text=f"Device Ranking Ladder (Top {top_n})",
448
+ x=0.5,
449
+ y=0.95,
450
+ font=dict(size=16, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"),
451
+ ),
452
+ xaxis=dict(
453
+ title="Rating",
454
+ showgrid=True,
455
+ gridwidth=1,
456
+ gridcolor="rgba(0,0,0,0.1)",
457
+ autorange="reversed", # Reverse x-axis to show highest values on left
458
+ titlefont=dict(
459
+ size=14, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"
460
+ ),
461
+ ),
462
+ yaxis=dict(
463
+ title="Rank",
464
+ showgrid=True,
465
+ gridwidth=1,
466
+ gridcolor="rgba(0,0,0,0.1)",
467
+ tickmode="array",
468
+ tickvals=top_devices["rank_position"],
469
+ ticktext=[f"#{i}" for i in range(1, len(top_devices) + 1)],
470
+ autorange="reversed", # This will put rank 1 at the top
471
+ titlefont=dict(
472
+ size=14, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"
473
+ ),
474
+ ),
475
+ showlegend=True,
476
+ legend=dict(
477
+ orientation="h",
478
+ yanchor="bottom",
479
+ y=1.02,
480
+ xanchor="right",
481
+ x=1,
482
+ font=dict(size=12, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"),
483
+ ),
484
+ margin=dict(t=100, l=50, r=200, b=50),
485
+ height=800,
486
+ hovermode="closest",
487
+ paper_bgcolor="rgba(255,255,255,1)", # Pure white background
488
+ plot_bgcolor="rgba(255,255,255,1)", # Pure white plot area
489
+ )
490
+
491
+ return fig
492
+
493
+
494
  def render_device_rankings(df: pd.DataFrame):
495
  """Render device rankings using Glicko-2 algorithm."""
496
  if df.empty:
 
608
  g2_confident_display.index = g2_confident_display.index + 1
609
  g2_confident_display = g2_confident_display.rename_axis("Rank")
610
 
611
+ # Display the ranking ladder
612
+ st.plotly_chart(
613
+ create_ranking_ladder(g2_confident_display, top_n=20),
614
+ use_container_width=True,
615
+ )
616
+
617
  # Display the table
618
  st.dataframe(
619
  g2_confident_display[
 
632
  hide_index=False,
633
  )
634
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  else:
636
  st.warning(
637
  "No confident rankings available. Try adjusting the minimum matches threshold."