File size: 5,734 Bytes
ac500fb
 
 
 
 
 
4ccd91a
ac500fb
 
 
 
9a432d8
 
 
 
 
 
 
 
 
 
 
 
 
 
ac500fb
9a432d8
 
 
 
ac500fb
 
 
9a432d8
ac500fb
 
9a432d8
7563dbc
 
9a432d8
ac500fb
9a432d8
 
ac500fb
31b5bf1
 
 
 
 
 
 
efee332
 
31b5bf1
 
 
 
 
 
9a432d8
 
efee332
 
9a432d8
 
 
 
ac500fb
9a432d8
 
 
 
5efb8b6
9a432d8
ac500fb
543a007
 
 
5efb8b6
ac500fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import json
import os
import numpy as np
import pandas as pd

from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn, AREA_DEFINITIONS, AREA_AVG_COLUMN_MAP, fields, PLUE_GROUP_AREAS
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import Tasks


def get_leaderboard_df(results_path: str = None, requests_path: str = None, cols: list = None, initial_df: pd.DataFrame = None) -> pd.DataFrame:
    """Creates a dataframe from all the individual experiment results or uses a provided initial DataFrame."""
    
    if initial_df is not None:
        df = initial_df.copy() # Use a cópia do DataFrame inicial
        print("Usando DataFrame inicial fornecido.")
    elif results_path and requests_path:
        print(f"Lendo resultados de: {results_path}")
        raw_data = get_raw_eval_results(results_path, requests_path)
        all_data_json = [v.to_dict() for v in raw_data]
        df = pd.DataFrame.from_records(all_data_json)
    else:
        print("Erro: Nenhum DataFrame inicial nem caminhos de resultados fornecidos.")
        return pd.DataFrame() # Retorna DataFrame vazio se não houver dados

    # Garantir que colunas de tasks existem antes de calcular médias
    # (Opcional: Adicionar lógica para lidar com DFs que já têm médias calculadas)
    tasks_in_df = [task.name for task in Tasks if task.name in df.columns]
    print(f"Tasks encontrados no DataFrame: {tasks_in_df}")

    # Calcular médias por área
    for area_name, tasks_in_area in AREA_DEFINITIONS.items():
        # Usar task.name que é a chave interna/coluna no df
        area_cols = [task.name for task in tasks_in_area if task.name in df.columns]
        avg_col_name = AREA_AVG_COLUMN_MAP[area_name]
        if area_cols:
            # Lidar com possíveis NaNs e substituir 0 por NaN nas colunas antes de calcular a média
            df[avg_col_name] = df[area_cols].replace(0, np.nan).mean(axis=1, skipna=True)
            print(f"Calculada média para {area_name} usando colunas: {area_cols}")
        else:
            df[avg_col_name] = np.nan
            print(f"Nenhuma coluna encontrada para {area_name}, definindo média como NaN.")
    
    # Calcular Média PLUE
    plue_avg_cols_to_consider = [
        AREA_AVG_COLUMN_MAP[area] 
        for area in PLUE_GROUP_AREAS 
        if area in AREA_AVG_COLUMN_MAP and AREA_AVG_COLUMN_MAP[area] in df.columns
    ]
    if plue_avg_cols_to_consider:
        # Substitui 0 por NaN antes de calcular a média PLUE
        df[AutoEvalColumn.plue_avg.name] = df[plue_avg_cols_to_consider].replace(0, np.nan).mean(axis=1, skipna=True)
        print(f"Calculada Média PLUE usando colunas: {plue_avg_cols_to_consider}")
    else:
        df[AutoEvalColumn.plue_avg.name] = np.nan
        print("Nenhuma coluna de média de área PLUE encontrada, definindo Média PLUE como NaN.")

    # Calcular Média Geral (baseada nas médias de TODAS as áreas)
    avg_area_cols = [col for col in AREA_AVG_COLUMN_MAP.values() if col in df.columns]
    if avg_area_cols:
        # Substitui 0 por NaN antes de calcular a média Geral
        df[AutoEvalColumn.average.name] = df[avg_area_cols].replace(0, np.nan).mean(axis=1, skipna=True)
        print(f"Calculada Média Geral usando colunas: {avg_area_cols}")
    else: 
        df[AutoEvalColumn.average.name] = np.nan
        print("Nenhuma coluna de média de área encontrada, definindo Média Geral como NaN.")

    # Ordenar pela Média Geral
    if AutoEvalColumn.average.name in df.columns:
      df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
    
    # Apenas arredondar os valores numéricos existentes
    df = df.round(decimals=2)

    # Substituir NaN por "-" para exibição
    df = df.fillna('-')

    print(f"Colunas retornadas por get_leaderboard_df: {df.columns.tolist()}") # Adicionar log
    return df


def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
    """Creates the different dataframes for the evaluation queues requestes"""
    entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
    all_evals = []

    for entry in entries:
        if ".json" in entry:
            file_path = os.path.join(save_path, entry)
            with open(file_path) as fp:
                data = json.load(fp)

            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
            data[EvalQueueColumn.revision.name] = data.get("revision", "main")

            all_evals.append(data)
        elif ".md" not in entry:
            # this is a folder
            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
            for sub_entry in sub_entries:
                file_path = os.path.join(save_path, entry, sub_entry)
                with open(file_path) as fp:
                    data = json.load(fp)

                data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
                data[EvalQueueColumn.revision.name] = data.get("revision", "main")
                all_evals.append(data)

    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
    running_list = [e for e in all_evals if e["status"] == "RUNNING"]
    finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
    df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
    df_running = pd.DataFrame.from_records(running_list, columns=cols)
    df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
    return df_finished[cols], df_running[cols], df_pending[cols]