chore: clean up device ranking tab
Browse files- src/app.py +34 -5
- src/components/visualizations.py +38 -30
src/app.py
CHANGED
@@ -156,11 +156,40 @@ async def main():
|
|
156 |
# Device rankings view
|
157 |
st.title(" Device Rankings")
|
158 |
|
159 |
-
#
|
160 |
-
st.
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
)
|
165 |
|
166 |
# Render performance metrics
|
|
|
156 |
# Device rankings view
|
157 |
st.title(" Device Rankings")
|
158 |
|
159 |
+
# Footnote-style information
|
160 |
+
st.markdown(
|
161 |
+
"""
|
162 |
+
<div style="position: relative;">
|
163 |
+
<div style="margin-bottom: 10px;">
|
164 |
+
<a href="#" data-tooltip="Rankings calculated using Glicko-2 algorithm with standardized conditions: PP=512 tokens, TG=128 tokens" style="text-decoration: none; color: #888; font-size: 12px; border-bottom: 1px dotted #888;">
|
165 |
+
ℹ️ Ranking methodology
|
166 |
+
</a>
|
167 |
+
</div>
|
168 |
+
</div>
|
169 |
+
<style>
|
170 |
+
[data-tooltip] {
|
171 |
+
position: relative;
|
172 |
+
cursor: pointer;
|
173 |
+
}
|
174 |
+
[data-tooltip]:hover::after {
|
175 |
+
content: attr(data-tooltip);
|
176 |
+
position: absolute;
|
177 |
+
left: 0;
|
178 |
+
top: 100%;
|
179 |
+
background-color: #f8f9fa;
|
180 |
+
border: 1px solid #dee2e6;
|
181 |
+
border-radius: 4px;
|
182 |
+
padding: 8px 12px;
|
183 |
+
width: max-content;
|
184 |
+
max-width: 300px;
|
185 |
+
z-index: 100;
|
186 |
+
font-size: 12px;
|
187 |
+
color: #333;
|
188 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
189 |
+
}
|
190 |
+
</style>
|
191 |
+
""",
|
192 |
+
unsafe_allow_html=True,
|
193 |
)
|
194 |
|
195 |
# Render performance metrics
|
src/components/visualizations.py
CHANGED
@@ -288,7 +288,7 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
288 |
grouped_df[display_cols],
|
289 |
use_container_width=True,
|
290 |
height=min(
|
291 |
-
|
292 |
), # Dynamic height based on content
|
293 |
hide_index=False,
|
294 |
column_config={
|
@@ -443,12 +443,12 @@ def create_ranking_ladder(g2_confident_display: pd.DataFrame, top_n: int = 30):
|
|
443 |
|
444 |
# Update layout
|
445 |
fig.update_layout(
|
446 |
-
title=dict(
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
),
|
452 |
xaxis=dict(
|
453 |
title="Rating",
|
454 |
showgrid=True,
|
@@ -529,7 +529,7 @@ def render_device_rankings(df: pd.DataFrame):
|
|
529 |
with col3:
|
530 |
st.metric("Total Models", total_models)
|
531 |
|
532 |
-
st.markdown("---")
|
533 |
|
534 |
# Display confident rankings
|
535 |
if not g2_confident.empty:
|
@@ -609,29 +609,37 @@ def render_device_rankings(df: pd.DataFrame):
|
|
609 |
g2_confident_display.index = g2_confident_display.index + 1
|
610 |
g2_confident_display = g2_confident_display.rename_axis("Rank")
|
611 |
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
# Display the table
|
619 |
-
st.dataframe(
|
620 |
-
g2_confident_display[
|
621 |
-
[
|
622 |
-
"Device",
|
623 |
-
"Platform",
|
624 |
-
"Rating",
|
625 |
-
"Rating Deviation",
|
626 |
-
"Token Rating",
|
627 |
-
"Prompt Rating",
|
628 |
-
"Model Size Range",
|
629 |
-
]
|
630 |
-
],
|
631 |
-
use_container_width=True,
|
632 |
-
height=min(600, (len(g2_confident_display) + 1) * 35 + 40),
|
633 |
-
hide_index=False,
|
634 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
|
636 |
else:
|
637 |
st.warning(
|
|
|
288 |
grouped_df[display_cols],
|
289 |
use_container_width=True,
|
290 |
height=min(
|
291 |
+
600, (len(grouped_df) + 1) * 35 + 40
|
292 |
), # Dynamic height based on content
|
293 |
hide_index=False,
|
294 |
column_config={
|
|
|
443 |
|
444 |
# Update layout
|
445 |
fig.update_layout(
|
446 |
+
# title=dict(
|
447 |
+
# text=f"Device Ranking Ladder (Top {top_n})",
|
448 |
+
# x=0.4,
|
449 |
+
# y=0.95,
|
450 |
+
# font=dict(size=16, family="Arial, sans-serif", color="rgba(0,0,0,1.0)"),
|
451 |
+
# ),
|
452 |
xaxis=dict(
|
453 |
title="Rating",
|
454 |
showgrid=True,
|
|
|
529 |
with col3:
|
530 |
st.metric("Total Models", total_models)
|
531 |
|
532 |
+
# st.markdown("---")
|
533 |
|
534 |
# Display confident rankings
|
535 |
if not g2_confident.empty:
|
|
|
609 |
g2_confident_display.index = g2_confident_display.index + 1
|
610 |
g2_confident_display = g2_confident_display.rename_axis("Rank")
|
611 |
|
612 |
+
tab1, tab2 = st.tabs(
|
613 |
+
[
|
614 |
+
"Performance Ladder",
|
615 |
+
"Performance Table",
|
616 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
)
|
618 |
+
with tab1:
|
619 |
+
# Display the ranking ladder
|
620 |
+
st.plotly_chart(
|
621 |
+
create_ranking_ladder(g2_confident_display, top_n=30),
|
622 |
+
use_container_width=True,
|
623 |
+
)
|
624 |
+
|
625 |
+
with tab2:
|
626 |
+
# Display the table
|
627 |
+
st.dataframe(
|
628 |
+
g2_confident_display[
|
629 |
+
[
|
630 |
+
"Device",
|
631 |
+
"Platform",
|
632 |
+
"Rating",
|
633 |
+
"Rating Deviation",
|
634 |
+
"Token Rating",
|
635 |
+
"Prompt Rating",
|
636 |
+
"Model Size Range",
|
637 |
+
]
|
638 |
+
],
|
639 |
+
use_container_width=True,
|
640 |
+
height=min(600, (len(g2_confident_display) + 1) * 35 + 40),
|
641 |
+
hide_index=False,
|
642 |
+
)
|
643 |
|
644 |
else:
|
645 |
st.warning(
|