feat: add plot ranking ladder
Browse files- src/components/visualizations.py +182 -14
src/components/visualizations.py
CHANGED
@@ -7,6 +7,8 @@ import plotly.express as px
|
|
7 |
import pandas as pd
|
8 |
from typing import Optional, Dict, List, Set
|
9 |
from ..core.glicko2_ranking import analyze_glicko2_rankings
|
|
|
|
|
10 |
|
11 |
|
12 |
def clean_device_id(device_id: str) -> str:
|
@@ -315,6 +317,180 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
315 |
)
|
316 |
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
def render_device_rankings(df: pd.DataFrame):
|
319 |
"""Render device rankings using Glicko-2 algorithm."""
|
320 |
if df.empty:
|
@@ -432,6 +608,12 @@ def render_device_rankings(df: pd.DataFrame):
|
|
432 |
g2_confident_display.index = g2_confident_display.index + 1
|
433 |
g2_confident_display = g2_confident_display.rename_axis("Rank")
|
434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
# Display the table
|
436 |
st.dataframe(
|
437 |
g2_confident_display[
|
@@ -450,20 +632,6 @@ def render_device_rankings(df: pd.DataFrame):
|
|
450 |
hide_index=False,
|
451 |
)
|
452 |
|
453 |
-
# # Platform statistics
|
454 |
-
# st.markdown("#### Platform Statistics")
|
455 |
-
# platform_stats = (
|
456 |
-
# g2_confident_display.groupby("Platform")
|
457 |
-
# .agg(
|
458 |
-
# {
|
459 |
-
# "Rating": ["mean", "std"],
|
460 |
-
# }
|
461 |
-
# )
|
462 |
-
# .round(0)
|
463 |
-
# .astype(int)
|
464 |
-
# )
|
465 |
-
# st.dataframe(platform_stats, use_container_width=True)
|
466 |
-
|
467 |
else:
|
468 |
st.warning(
|
469 |
"No confident rankings available. Try adjusting the minimum matches threshold."
|
|
|
7 |
import pandas as pd
|
8 |
from typing import Optional, Dict, List, Set
|
9 |
from ..core.glicko2_ranking import analyze_glicko2_rankings
|
10 |
+
import plotly.graph_objects as go
|
11 |
+
import numpy as np
|
12 |
|
13 |
|
14 |
def clean_device_id(device_id: str) -> str:
|
|
|
317 |
)
|
318 |
|
319 |
|
320 |
+
def create_device_radar_chart(g2_confident_display: pd.DataFrame, top_n: int = 10):
|
321 |
+
"""Create a radar chart comparing the top N devices across different performance metrics."""
|
322 |
+
# Select top N devices
|
323 |
+
top_devices = g2_confident_display.nlargest(top_n, "Rating")
|
324 |
+
|
325 |
+
# Normalize metrics to 0-100 scale for better visualization
|
326 |
+
metrics = ["Rating", "Token Rating", "Prompt Rating"]
|
327 |
+
for metric in metrics:
|
328 |
+
min_val = top_devices[metric].min()
|
329 |
+
max_val = top_devices[metric].max()
|
330 |
+
top_devices[f"{metric}_normalized"] = (
|
331 |
+
(top_devices[metric] - min_val) / (max_val - min_val)
|
332 |
+
) * 100
|
333 |
+
|
334 |
+
# Create radar chart
|
335 |
+
fig = go.Figure()
|
336 |
+
|
337 |
+
# Add a trace for each device
|
338 |
+
for idx, row in top_devices.iterrows():
|
339 |
+
fig.add_trace(
|
340 |
+
go.Scatterpolar(
|
341 |
+
r=[
|
342 |
+
row["Rating_normalized"],
|
343 |
+
row["Token Rating_normalized"],
|
344 |
+
row["Prompt Rating_normalized"],
|
345 |
+
row["Rating_normalized"], # Close the shape
|
346 |
+
],
|
347 |
+
theta=["Overall", "Token Gen", "Prompt Proc", "Overall"],
|
348 |
+
fill="toself",
|
349 |
+
name=f"{row['Device']} ({row['Platform']})",
|
350 |
+
line=dict(
|
351 |
+
color=px.colors.qualitative.Set1[
|
352 |
+
idx % len(px.colors.qualitative.Set1)
|
353 |
+
]
|
354 |
+
),
|
355 |
+
hovertemplate="<b>%{name}</b><br>"
|
356 |
+
+ "Overall: %{r[0]:.1f}%<br>"
|
357 |
+
+ "Token Gen: %{r[1]:.1f}%<br>"
|
358 |
+
+ "Prompt Proc: %{r[2]:.1f}%<br>"
|
359 |
+
+ "<extra></extra>",
|
360 |
+
)
|
361 |
+
)
|
362 |
+
|
363 |
+
# Update layout
|
364 |
+
fig.update_layout(
|
365 |
+
polar=dict(
|
366 |
+
radialaxis=dict(visible=True, range=[0, 100], tickfont=dict(size=10)),
|
367 |
+
angularaxis=dict(tickfont=dict(size=12)),
|
368 |
+
),
|
369 |
+
showlegend=True,
|
370 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
371 |
+
title=dict(
|
372 |
+
text=f"Performance Comparison of Top {top_n} Devices",
|
373 |
+
x=0.5,
|
374 |
+
y=0.95,
|
375 |
+
font=dict(size=16),
|
376 |
+
),
|
377 |
+
margin=dict(t=100, l=50, r=50, b=50),
|
378 |
+
height=600,
|
379 |
+
)
|
380 |
+
|
381 |
+
return fig
|
382 |
+
|
383 |
+
|
384 |
+
def create_ranking_ladder(g2_confident_display: pd.DataFrame, top_n: int = 20):
|
385 |
+
"""Create a ranking ladder visualization showing device positions and confidence intervals."""
|
386 |
+
# Select top N devices
|
387 |
+
top_devices = g2_confident_display.nlargest(top_n, "Rating").copy()
|
388 |
+
|
389 |
+
# Create y-axis positions (rank 1 at top)
|
390 |
+
top_devices["rank_position"] = np.arange(1, len(top_devices) + 1)
|
391 |
+
|
392 |
+
# Create figure
|
393 |
+
fig = go.Figure()
|
394 |
+
|
395 |
+
# Add confidence intervals
|
396 |
+
for idx, row in top_devices.iterrows():
|
397 |
+
# Add confidence interval bars
|
398 |
+
fig.add_trace(
|
399 |
+
go.Scatter(
|
400 |
+
x=[
|
401 |
+
row["Rating"] - row["Rating Deviation"],
|
402 |
+
row["Rating"] + row["Rating Deviation"],
|
403 |
+
],
|
404 |
+
y=[row["rank_position"], row["rank_position"]],
|
405 |
+
mode="lines",
|
406 |
+
line=dict(color="rgba(0,0,0,0.3)", width=8),
|
407 |
+
showlegend=False,
|
408 |
+
hoverinfo="skip",
|
409 |
+
)
|
410 |
+
)
|
411 |
+
|
412 |
+
# Add rating points
|
413 |
+
for platform in top_devices["Platform"].unique():
|
414 |
+
platform_devices = top_devices[top_devices["Platform"] == platform]
|
415 |
+
fig.add_trace(
|
416 |
+
go.Scatter(
|
417 |
+
x=platform_devices["Rating"],
|
418 |
+
y=platform_devices["rank_position"],
|
419 |
+
mode="markers+text",
|
420 |
+
marker=dict(
|
421 |
+
size=12,
|
422 |
+
color=px.colors.qualitative.Set1[
|
423 |
+
list(top_devices["Platform"].unique()).index(platform)
|
424 |
+
% len(px.colors.qualitative.Set1)
|
425 |
+
],
|
426 |
+
),
|
427 |
+
text=platform_devices["Device"],
|
428 |
+
textposition="middle right",
|
429 |
+
textfont=dict(
|
430 |
+
color="rgba(0,0,0,1.0)", # Full black for maximum contrast
|
431 |
+
size=12, # Slightly larger
|
432 |
+
family="Arial Black, sans-serif", # Bold font
|
433 |
+
),
|
434 |
+
name=platform,
|
435 |
+
hovertemplate="<b>%{text}</b><br>"
|
436 |
+
+ "Rank: #%{y:.0f}<br>"
|
437 |
+
+ "Rating: %{x:.0f}<br>"
|
438 |
+
+ "Deviation: ±%{customdata[0]:.0f}<br>"
|
439 |
+
+ "<extra></extra>",
|
440 |
+
customdata=platform_devices[["Rating Deviation"]].values,
|
441 |
+
)
|
442 |
+
)
|
443 |
+
|
444 |
+
# Update layout
|
445 |
+
fig.update_layout(
|
446 |
+
title=dict(
|
447 |
+
text=f"Device Ranking Ladder (Top {top_n})",
|
448 |
+
x=0.5,
|
449 |
+
y=0.95,
|
450 |
+
font=dict(size=16, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"),
|
451 |
+
),
|
452 |
+
xaxis=dict(
|
453 |
+
title="Rating",
|
454 |
+
showgrid=True,
|
455 |
+
gridwidth=1,
|
456 |
+
gridcolor="rgba(0,0,0,0.1)",
|
457 |
+
autorange="reversed", # Reverse x-axis to show highest values on left
|
458 |
+
titlefont=dict(
|
459 |
+
size=14, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"
|
460 |
+
),
|
461 |
+
),
|
462 |
+
yaxis=dict(
|
463 |
+
title="Rank",
|
464 |
+
showgrid=True,
|
465 |
+
gridwidth=1,
|
466 |
+
gridcolor="rgba(0,0,0,0.1)",
|
467 |
+
tickmode="array",
|
468 |
+
tickvals=top_devices["rank_position"],
|
469 |
+
ticktext=[f"#{i}" for i in range(1, len(top_devices) + 1)],
|
470 |
+
autorange="reversed", # This will put rank 1 at the top
|
471 |
+
titlefont=dict(
|
472 |
+
size=14, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"
|
473 |
+
),
|
474 |
+
),
|
475 |
+
showlegend=True,
|
476 |
+
legend=dict(
|
477 |
+
orientation="h",
|
478 |
+
yanchor="bottom",
|
479 |
+
y=1.02,
|
480 |
+
xanchor="right",
|
481 |
+
x=1,
|
482 |
+
font=dict(size=12, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"),
|
483 |
+
),
|
484 |
+
margin=dict(t=100, l=50, r=200, b=50),
|
485 |
+
height=800,
|
486 |
+
hovermode="closest",
|
487 |
+
paper_bgcolor="rgba(255,255,255,1)", # Pure white background
|
488 |
+
plot_bgcolor="rgba(255,255,255,1)", # Pure white plot area
|
489 |
+
)
|
490 |
+
|
491 |
+
return fig
|
492 |
+
|
493 |
+
|
494 |
def render_device_rankings(df: pd.DataFrame):
|
495 |
"""Render device rankings using Glicko-2 algorithm."""
|
496 |
if df.empty:
|
|
|
608 |
g2_confident_display.index = g2_confident_display.index + 1
|
609 |
g2_confident_display = g2_confident_display.rename_axis("Rank")
|
610 |
|
611 |
+
# Display the ranking ladder
|
612 |
+
st.plotly_chart(
|
613 |
+
create_ranking_ladder(g2_confident_display, top_n=20),
|
614 |
+
use_container_width=True,
|
615 |
+
)
|
616 |
+
|
617 |
# Display the table
|
618 |
st.dataframe(
|
619 |
g2_confident_display[
|
|
|
632 |
hide_index=False,
|
633 |
)
|
634 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
else:
|
636 |
st.warning(
|
637 |
"No confident rankings available. Try adjusting the minimum matches threshold."
|