Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -256,20 +256,20 @@ with demo:
|
|
256 |
|
257 |
# 1. Benchmark Geral
|
258 |
with gr.TabItem("📊 Benchmark Geral", id=tab_index):
|
259 |
-
# Colunas a exibir: T, Modelo, Média Geral, PLUE
|
260 |
general_cols_to_display = [
|
261 |
AutoEvalColumn.model_type_symbol.name, # T
|
262 |
AutoEvalColumn.model.name, # Modelo
|
263 |
AutoEvalColumn.average.name, # Média Geral
|
264 |
AutoEvalColumn.plue_avg.name, # Média PLUE
|
265 |
-
#
|
266 |
-
# AutoEvalColumn.
|
267 |
-
# AutoEvalColumn.
|
268 |
-
# AutoEvalColumn.semantica_inferencia_avg.name,\
|
269 |
]
|
270 |
general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
|
271 |
|
272 |
-
# Colunas a ocultar:
|
|
|
273 |
general_hidden_cols = [task.name for task in Tasks] + \
|
274 |
list(AREA_AVG_COLUMN_MAP.values()) + \
|
275 |
[
|
@@ -291,69 +291,34 @@ with demo:
|
|
291 |
)
|
292 |
tab_index += 1
|
293 |
|
294 |
-
# 2. PLUE
|
295 |
with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
|
296 |
-
# --- Lógica interna da aba PLUE (
|
297 |
gr.Markdown("## Selecione a visualização PLUE:")
|
298 |
-
#
|
299 |
-
all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS)
|
300 |
plue_dropdown = gr.Dropdown(
|
301 |
choices=all_plue_options,
|
302 |
label="Visualização PLUE",
|
303 |
value=PLUE_GENERAL_VIEW_NAME
|
304 |
)
|
305 |
|
306 |
-
# Função auxiliar (
|
307 |
def get_plue_leaderboard_config(selected_option):
|
308 |
if selected_option == PLUE_GENERAL_VIEW_NAME:
|
309 |
-
|
310 |
-
|
311 |
-
AutoEvalColumn.model_type_symbol.name,
|
312 |
-
AutoEvalColumn.model.name,
|
313 |
-
] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
|
314 |
-
hidden_cols = [task.name for task in Tasks] + \
|
315 |
-
[AutoEvalColumn.average.name] + \
|
316 |
-
[
|
317 |
-
AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
|
318 |
-
AutoEvalColumn.model_type.name,
|
319 |
-
AutoEvalColumn.architecture.name,
|
320 |
-
AutoEvalColumn.weight_type.name,
|
321 |
-
AutoEvalColumn.precision.name,
|
322 |
-
AutoEvalColumn.license.name,
|
323 |
-
AutoEvalColumn.params.name,
|
324 |
-
AutoEvalColumn.likes.name,
|
325 |
-
AutoEvalColumn.still_on_hub.name,
|
326 |
-
AutoEvalColumn.revision.name
|
327 |
-
]
|
328 |
title = PLUE_GENERAL_VIEW_NAME
|
329 |
else:
|
330 |
-
# Lógica para área específica (inalterada)
|
331 |
selected_area = selected_option
|
332 |
tasks_in_area = AREA_DEFINITIONS[selected_area]
|
333 |
-
displayed_cols = [
|
334 |
-
|
335 |
-
AutoEvalColumn.model.name,
|
336 |
-
] + [task.name for task in tasks_in_area]
|
337 |
-
hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + \
|
338 |
-
[task.name for task in Tasks if task not in tasks_in_area] + \
|
339 |
-
[
|
340 |
-
AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
|
341 |
-
AutoEvalColumn.average.name, # Ocultar média geral aqui também
|
342 |
-
AutoEvalColumn.model_type.name,
|
343 |
-
AutoEvalColumn.architecture.name,
|
344 |
-
AutoEvalColumn.weight_type.name,
|
345 |
-
AutoEvalColumn.precision.name,
|
346 |
-
AutoEvalColumn.license.name,
|
347 |
-
AutoEvalColumn.params.name,
|
348 |
-
AutoEvalColumn.likes.name,
|
349 |
-
AutoEvalColumn.still_on_hub.name,
|
350 |
-
AutoEvalColumn.revision.name
|
351 |
-
]
|
352 |
title = selected_area
|
353 |
final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
|
354 |
return displayed_cols, final_hidden_cols, title
|
355 |
|
356 |
-
# Pré-renderização (ATUALIZAR loop com novas all_plue_options)
|
357 |
plue_containers = {}
|
358 |
for option in all_plue_options:
|
359 |
displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
|
@@ -361,7 +326,7 @@ with demo:
|
|
361 |
with gr.Group(visible=is_visible) as plue_containers[option]:
|
362 |
create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
|
363 |
|
364 |
-
# Função de callback (
|
365 |
def switch_plue_view(selected_option):
|
366 |
update_list = []
|
367 |
for option in all_plue_options:
|
@@ -373,8 +338,32 @@ with demo:
|
|
373 |
# --- Fim Lógica PLUE ---
|
374 |
tab_index += 1
|
375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
# 5. Submit
|
377 |
-
with gr.TabItem("
|
378 |
with gr.Column():
|
379 |
with gr.Row():
|
380 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
256 |
|
257 |
# 1. Benchmark Geral
|
258 |
with gr.TabItem("📊 Benchmark Geral", id=tab_index):
|
259 |
+
# Colunas a exibir: T, Modelo, Média Geral, PLUE
|
260 |
general_cols_to_display = [
|
261 |
AutoEvalColumn.model_type_symbol.name, # T
|
262 |
AutoEvalColumn.model.name, # Modelo
|
263 |
AutoEvalColumn.average.name, # Média Geral
|
264 |
AutoEvalColumn.plue_avg.name, # Média PLUE
|
265 |
+
# Adicionar médias Energy/Reasoning se desejado (opcional)
|
266 |
+
# AutoEvalColumn.energy_avg.name,
|
267 |
+
# AutoEvalColumn.reasoning_avg.name,
|
|
|
268 |
]
|
269 |
general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
|
270 |
|
271 |
+
# Colunas a ocultar: Tasks + Médias de Área Individuais (PLUE e não-PLUE) + detalhes
|
272 |
+
# (Garantir que Energy/Reasoning avg estão aqui se não forem exibidas)
|
273 |
general_hidden_cols = [task.name for task in Tasks] + \
|
274 |
list(AREA_AVG_COLUMN_MAP.values()) + \
|
275 |
[
|
|
|
291 |
)
|
292 |
tab_index += 1
|
293 |
|
294 |
+
# 2. PLUE (Agora apenas com as áreas originais + 3 adicionadas)
|
295 |
with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
|
296 |
+
# --- Lógica interna da aba PLUE (ajustada) ---
|
297 |
gr.Markdown("## Selecione a visualização PLUE:")
|
298 |
+
# RECALCULAR choices e options com base na PLUE_GROUP_AREAS atualizada (sem Energy/Reasoning)
|
299 |
+
all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS)
|
300 |
plue_dropdown = gr.Dropdown(
|
301 |
choices=all_plue_options,
|
302 |
label="Visualização PLUE",
|
303 |
value=PLUE_GENERAL_VIEW_NAME
|
304 |
)
|
305 |
|
306 |
+
# Função auxiliar (lógica interna não muda, mas opera sobre PLUE_GROUP_AREAS atualizada)
|
307 |
def get_plue_leaderboard_config(selected_option):
|
308 |
if selected_option == PLUE_GENERAL_VIEW_NAME:
|
309 |
+
displayed_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name,] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
|
310 |
+
hidden_cols = [task.name for task in Tasks] + [avg_col for area, avg_col in AREA_AVG_COLUMN_MAP.items() if area not in PLUE_GROUP_AREAS] + [AutoEvalColumn.average.name] + [AutoEvalColumn.plue_avg.name, AutoEvalColumn.model_type.name, AutoEvalColumn.architecture.name, AutoEvalColumn.weight_type.name, AutoEvalColumn.precision.name, AutoEvalColumn.license.name, AutoEvalColumn.params.name, AutoEvalColumn.likes.name, AutoEvalColumn.still_on_hub.name, AutoEvalColumn.revision.name]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
title = PLUE_GENERAL_VIEW_NAME
|
312 |
else:
|
|
|
313 |
selected_area = selected_option
|
314 |
tasks_in_area = AREA_DEFINITIONS[selected_area]
|
315 |
+
displayed_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name,] + [task.name for task in tasks_in_area]
|
316 |
+
hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + [task.name for task in Tasks if task not in tasks_in_area] + [AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name, AutoEvalColumn.model_type.name, AutoEvalColumn.architecture.name, AutoEvalColumn.weight_type.name, AutoEvalColumn.precision.name, AutoEvalColumn.license.name, AutoEvalColumn.params.name, AutoEvalColumn.likes.name, AutoEvalColumn.still_on_hub.name, AutoEvalColumn.revision.name]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
title = selected_area
|
318 |
final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
|
319 |
return displayed_cols, final_hidden_cols, title
|
320 |
|
321 |
+
# Pré-renderização (ATUALIZAR loop e containers com novas all_plue_options)
|
322 |
plue_containers = {}
|
323 |
for option in all_plue_options:
|
324 |
displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
|
|
|
326 |
with gr.Group(visible=is_visible) as plue_containers[option]:
|
327 |
create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
|
328 |
|
329 |
+
# Função de callback (ATUALIZAR loop com novas all_plue_options)
|
330 |
def switch_plue_view(selected_option):
|
331 |
update_list = []
|
332 |
for option in all_plue_options:
|
|
|
338 |
# --- Fim Lógica PLUE ---
|
339 |
tab_index += 1
|
340 |
|
341 |
+
# 3. Energy
|
342 |
+
with gr.TabItem("⚡️ Energy", id=tab_index):
|
343 |
+
# Exibir leaderboard com dados de Energy
|
344 |
+
energy_tasks = AREA_DEFINITIONS.get("Energy", [])
|
345 |
+
energy_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + [t.name for t in energy_tasks]
|
346 |
+
energy_hidden = [t.name for t in Tasks if t not in energy_tasks] + \
|
347 |
+
list(AREA_AVG_COLUMN_MAP.values()) + \
|
348 |
+
[AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name] + \
|
349 |
+
[c.name for c in fields(AutoEvalColumn) if c.name not in energy_cols and c.name != AutoEvalColumn.model_type_symbol.name and c.name != AutoEvalColumn.model.name ] # Detalhes
|
350 |
+
create_leaderboard_component(LEADERBOARD_DF, displayed_cols=energy_cols, hidden_cols=[c for c in energy_hidden if c in LEADERBOARD_DF.columns], title="Energy")
|
351 |
+
tab_index += 1
|
352 |
+
|
353 |
+
# 4. Reasoning
|
354 |
+
with gr.TabItem("🤔 Reasoning", id=tab_index):
|
355 |
+
# Exibir leaderboard com dados de Reasoning
|
356 |
+
reasoning_tasks = AREA_DEFINITIONS.get("Reasoning", [])
|
357 |
+
reasoning_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + [t.name for t in reasoning_tasks]
|
358 |
+
reasoning_hidden = [t.name for t in Tasks if t not in reasoning_tasks] + \
|
359 |
+
list(AREA_AVG_COLUMN_MAP.values()) + \
|
360 |
+
[AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name] + \
|
361 |
+
[c.name for c in fields(AutoEvalColumn) if c.name not in reasoning_cols and c.name != AutoEvalColumn.model_type_symbol.name and c.name != AutoEvalColumn.model.name ] # Detalhes
|
362 |
+
create_leaderboard_component(LEADERBOARD_DF, displayed_cols=reasoning_cols, hidden_cols=[c for c in reasoning_hidden if c in LEADERBOARD_DF.columns], title="Reasoning")
|
363 |
+
tab_index += 1
|
364 |
+
|
365 |
# 5. Submit
|
366 |
+
with gr.TabItem("📤 Submit aqui!", id=tab_index):
|
367 |
with gr.Column():
|
368 |
with gr.Row():
|
369 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|