agent-leaderboard / utils.py
Pratik Bhavsar
working draft
10ad72f
raw
history blame
2.31 kB
from data_loader import CATEGORIES
from visualization import (
create_radar_plot,
get_performance_chart,
get_performance_cost_chart,
)
def model_info_tab(df, model_names=None):
if model_names is None or len(model_names) == 0:
model_names = [df.sort_values("Model Avg", ascending=False).iloc[0]["Model"]]
filtered_df = df[df["Model"].isin(model_names)]
radar_chart = create_radar_plot(df, model_names)
info_html = filtered_df[
[
"Model",
"Model Type",
"Model Avg",
"IO Cost",
"single turn perf",
"multi turn perf",
]
].to_html(index=False)
return info_html, radar_chart
def filter_leaderboard(df, model_type, category, sort_by):
filtered_df = df.copy()
if model_type != "All":
filtered_df = filtered_df[filtered_df["Model Type"].str.strip() == model_type]
dataset_columns = CATEGORIES.get(category, ["Model Avg"])
avg_score = filtered_df[dataset_columns].mean(axis=1)
filtered_df["Category Score"] = avg_score
if sort_by == "Performance":
filtered_df = filtered_df.sort_values(by="Category Score", ascending=False)
else:
filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
filtered_df["Rank"] = range(1, len(filtered_df) + 1)
perf_chart = get_performance_chart(filtered_df, category)
cost_chart = get_performance_cost_chart(filtered_df, category)
filtered_df["Cost (Input/Output)"] = filtered_df.apply(
lambda x: f"${x['Input cost per million token']:.2f}/${x['Output cost per million token']:.2f}",
axis=1,
)
display_columns = [
"Rank",
"Model",
"Model Type",
"Cost (Input/Output)",
"Category Score",
]
table_html = filtered_df[display_columns].to_html(index=False, escape=False)
note_html = """
<div style='margin-top: 20px; padding: 10px; background-color: #f3f4f6; border-radius: 4px;'>
<p style='margin: 0; font-size: 0.9em; color: #4b5563;'>
Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
</p>
</div>
"""
table_html += note_html
return table_html, perf_chart, cost_chart