Spaces:

galileo-ai
/

agent-leaderboard

Running on CPU Upgrade

File size: 6,831 Bytes

import gradio as gr

from data_loader import CATEGORIES, DESCRIPTION_HTML, CARDS
from visualization import (
    get_performance_chart,
    get_performance_cost_chart,
)
from utils import (
    get_rank_badge,
    get_score_bar,
    get_type_badge,
)

def filter_leaderboard(df, model_type, category, sort_by):
    filtered_df = df.copy()
    if model_type != "All":
        filtered_df = filtered_df[filtered_df["Model Type"].str.strip() == model_type]

    dataset_columns = CATEGORIES.get(category, ["Model Avg"])
    filtered_df["Category Score"] = filtered_df[dataset_columns].mean(axis=1)

    if sort_by == "Performance":
        filtered_df = filtered_df.sort_values(by="Category Score", ascending=False)
    else:
        filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)

    filtered_df["Rank"] = range(1, len(filtered_df) + 1)
    perf_chart = get_performance_chart(filtered_df, category)
    cost_chart = get_performance_cost_chart(filtered_df, category)

    # Generate styled table HTML
    table_html = f"""
    <style>
        @media (prefers-color-scheme: dark) {{
            :root {{
                --bg-color: #1a1b1e;
                --text-color: #ffffff;
                --border-color: #2d2e32;
                --hover-bg: #2d2e32;
                --note-bg: #2d2e32;
                --note-text: #a1a1aa;
                --accent-blue: #60A5FA;
                --accent-purple: #A78BFA;
                --accent-pink: #F472B6;
                --score-bg: rgba(255, 255, 255, 0.1);
            }}
        }}
        
        @media (prefers-color-scheme: light) {{
            :root {{
                --bg-color: #ffffff;
                --text-color: #000000;
                --border-color: #e5e7eb;
                --hover-bg: #f3f4f6;
                --note-bg: #f3f4f6;
                --note-text: #4b5563;
                --accent-blue: #3B82F6;
                --accent-purple: #8B5CF6;
                --accent-pink: #EC4899;
                --score-bg: rgba(0, 0, 0, 0.1);
            }}
        }}
        
        .dark-table-container {{
            background: var(--bg-color);
            border-radius: 12px;
            padding: 1px;
            margin: 20px 0;
        }}
        
        .dark-styled-table {{
            width: 100%;
            border-collapse: collapse;
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
            background: var(--bg-color);
            color: var(--text-color);
        }}
        
        .dark-styled-table thead {{
            position: sticky;
            top: 0;
            background: var(--bg-color);
            z-index: 1;
        }}
        
        .dark-styled-table th {{
            padding: 16px;
            text-align: left;
            font-weight: 500;
            color: var(--text-color);
            border-bottom: 1px solid var(--border-color);
        }}
        
        .dark-styled-table td {{
            padding: 16px;
            border-bottom: 1px solid var(--border-color);
            color: var(--text-color);
        }}
        
        .dark-styled-table tbody tr:hover {{
            background: var(--hover-bg);
        }}
        
        .model-cell {{
            font-weight: 500;
        }}
        
        .score-cell {{
            font-weight: 500;
        }}
        
        .note-box {{
            margin-top: 20px;
            padding: 16px;
            background: var(--note-bg);
            border-radius: 8px;
            color: var(--note-text);
        }}
    </style>
    
    <div class="note-box">
        <p style="margin: 0; font-size: 1em;">
            Note: API pricing for sorting by cost uses a 3-to-1 input/output ratio calculation. DeepSeek V3 and R1 were excluded from rankings due to limited function support.  Pricing for Gemini models shown reflects <a href="https://cloud.google.com/vertex-ai/generative-ai/pricing">Vertex AI</a>. Google AI Studio offers <a href="https://ai.google.dev/gemini-api/docs/pricing">Gemini API Access</a> at a lower cost with an API Key.
            
        </p>
    </div>
    
    <div class="dark-table-container">
        <table class="dark-styled-table">
            <thead>
                <tr>
                    <th>Rank</th>
                    <th>Model</th>
                    <th>Type</th>
                    <th>Vendor</th>
                    <th>Cost (I/O)</th>
                    <th>Avg Category Score (TSQ)</th>
                </tr>
            </thead>
            <tbody>
    """

    for _, row in filtered_df.iterrows():
        table_html += f"""
            <tr>
                <td>{get_rank_badge(row['Rank'])}</td>
                <td class="model-cell">{row['Model']}</td>
                <td>{get_type_badge(row['Model Type'])}</td>
                <td class="vendor-cell">{row['Vendor']}</td>
                <td>${row['Input cost per million token']:.2f}/${row['Output cost per million token']:.2f}</td>
                <td class="score-cell">{get_score_bar(row['Category Score'])}</td>
            </tr>
        """

    return table_html, perf_chart, cost_chart


def create_leaderboard_tab(df, CATEGORIES, METHODOLOGY, HEADER_CONTENT, CARDS):
    with gr.Tab("Leaderboard"):
        gr.HTML(HEADER_CONTENT + CARDS)
        gr.HTML(DESCRIPTION_HTML)

        # Filters row
        with gr.Row(equal_height=True):
            with gr.Column(scale=1):
                model_type = gr.Dropdown(
                    choices=["All"] + df["Model Type"].unique().tolist(),
                    value="All",
                    label="Model Type",
                )
            with gr.Column(scale=1):
                category = gr.Dropdown(
                    choices=list(CATEGORIES.keys()),
                    value=list(CATEGORIES.keys())[0],
                    label="Category",
                )
            with gr.Column(scale=1):
                sort_by = gr.Radio(
                    choices=["Performance", "Cost"],
                    value="Performance",
                    label="Sort by",
                )

        # Content
        output = gr.HTML()
        plot1 = gr.Plot()
        plot2 = gr.Plot()

        gr.HTML(
            """<div class="note-box">
                <p style="margin: 0; font-size: 1em;">
                    Note: API pricing for sorting by cost uses a 3-to-1 input/output ratio calculation.            
                </p>
            </div>"""
        )

        gr.HTML(METHODOLOGY)

        for input_comp in [model_type, category, sort_by]:
            input_comp.change(
                fn=lambda m, c, s: filter_leaderboard(df, m, c, s),
                inputs=[model_type, category, sort_by],
                outputs=[output, plot1, plot2],
            )

        return output, plot1, plot2