Spaces:
Running
Running
File size: 5,734 Bytes
ac500fb 4ccd91a ac500fb 9a432d8 ac500fb 9a432d8 ac500fb 9a432d8 ac500fb 9a432d8 7563dbc 9a432d8 ac500fb 9a432d8 ac500fb 31b5bf1 efee332 31b5bf1 9a432d8 efee332 9a432d8 ac500fb 9a432d8 5efb8b6 9a432d8 ac500fb 543a007 5efb8b6 ac500fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import json
import os
import numpy as np
import pandas as pd
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn, AREA_DEFINITIONS, AREA_AVG_COLUMN_MAP, fields, PLUE_GROUP_AREAS
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import Tasks
def get_leaderboard_df(results_path: str = None, requests_path: str = None, cols: list = None, initial_df: pd.DataFrame = None) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results or uses a provided initial DataFrame."""
if initial_df is not None:
df = initial_df.copy() # Use a cópia do DataFrame inicial
print("Usando DataFrame inicial fornecido.")
elif results_path and requests_path:
print(f"Lendo resultados de: {results_path}")
raw_data = get_raw_eval_results(results_path, requests_path)
all_data_json = [v.to_dict() for v in raw_data]
df = pd.DataFrame.from_records(all_data_json)
else:
print("Erro: Nenhum DataFrame inicial nem caminhos de resultados fornecidos.")
return pd.DataFrame() # Retorna DataFrame vazio se não houver dados
# Garantir que colunas de tasks existem antes de calcular médias
# (Opcional: Adicionar lógica para lidar com DFs que já têm médias calculadas)
tasks_in_df = [task.name for task in Tasks if task.name in df.columns]
print(f"Tasks encontrados no DataFrame: {tasks_in_df}")
# Calcular médias por área
for area_name, tasks_in_area in AREA_DEFINITIONS.items():
# Usar task.name que é a chave interna/coluna no df
area_cols = [task.name for task in tasks_in_area if task.name in df.columns]
avg_col_name = AREA_AVG_COLUMN_MAP[area_name]
if area_cols:
# Lidar com possíveis NaNs e substituir 0 por NaN nas colunas antes de calcular a média
df[avg_col_name] = df[area_cols].replace(0, np.nan).mean(axis=1, skipna=True)
print(f"Calculada média para {area_name} usando colunas: {area_cols}")
else:
df[avg_col_name] = np.nan
print(f"Nenhuma coluna encontrada para {area_name}, definindo média como NaN.")
# Calcular Média PLUE
plue_avg_cols_to_consider = [
AREA_AVG_COLUMN_MAP[area]
for area in PLUE_GROUP_AREAS
if area in AREA_AVG_COLUMN_MAP and AREA_AVG_COLUMN_MAP[area] in df.columns
]
if plue_avg_cols_to_consider:
# Substitui 0 por NaN antes de calcular a média PLUE
df[AutoEvalColumn.plue_avg.name] = df[plue_avg_cols_to_consider].replace(0, np.nan).mean(axis=1, skipna=True)
print(f"Calculada Média PLUE usando colunas: {plue_avg_cols_to_consider}")
else:
df[AutoEvalColumn.plue_avg.name] = np.nan
print("Nenhuma coluna de média de área PLUE encontrada, definindo Média PLUE como NaN.")
# Calcular Média Geral (baseada nas médias de TODAS as áreas)
avg_area_cols = [col for col in AREA_AVG_COLUMN_MAP.values() if col in df.columns]
if avg_area_cols:
# Substitui 0 por NaN antes de calcular a média Geral
df[AutoEvalColumn.average.name] = df[avg_area_cols].replace(0, np.nan).mean(axis=1, skipna=True)
print(f"Calculada Média Geral usando colunas: {avg_area_cols}")
else:
df[AutoEvalColumn.average.name] = np.nan
print("Nenhuma coluna de média de área encontrada, definindo Média Geral como NaN.")
# Ordenar pela Média Geral
if AutoEvalColumn.average.name in df.columns:
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
# Apenas arredondar os valores numéricos existentes
df = df.round(decimals=2)
# Substituir NaN por "-" para exibição
df = df.fillna('-')
print(f"Colunas retornadas por get_leaderboard_df: {df.columns.tolist()}") # Adicionar log
return df
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]
|