Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -251,175 +251,140 @@ with demo:
|
|
251 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
252 |
|
253 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
256 |
general_cols_to_display = [
|
257 |
AutoEvalColumn.model_type_symbol.name, # T
|
258 |
AutoEvalColumn.model.name, # Modelo
|
259 |
AutoEvalColumn.average.name, # Média Geral
|
260 |
AutoEvalColumn.plue_avg.name, # Média PLUE
|
261 |
-
#
|
262 |
-
AutoEvalColumn.discurso_odio_avg.name
|
263 |
-
AutoEvalColumn.economia_contabilidade_avg.name
|
264 |
-
AutoEvalColumn.semantica_inferencia_avg.name
|
265 |
]
|
266 |
-
# Garantir que só incluimos colunas que existem no DF
|
267 |
general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
|
268 |
-
|
269 |
-
|
270 |
-
] +
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
|
|
282 |
create_leaderboard_component(
|
283 |
LEADERBOARD_DF,
|
284 |
displayed_cols=general_cols_to_display,
|
285 |
hidden_cols=[col for col in general_hidden_cols if col in LEADERBOARD_DF.columns],
|
286 |
title="Benchmark Geral"
|
287 |
)
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
AutoEvalColumn.model_type_symbol.name,
|
306 |
AutoEvalColumn.model.name,
|
307 |
] + [task.name for task in tasks_in_area]
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
AutoEvalColumn.license.name,
|
360 |
-
AutoEvalColumn.params.name,
|
361 |
-
AutoEvalColumn.likes.name,
|
362 |
-
AutoEvalColumn.still_on_hub.name,
|
363 |
-
AutoEvalColumn.revision.name
|
364 |
-
]
|
365 |
-
title = PLUE_GENERAL_VIEW_NAME
|
366 |
-
else:
|
367 |
-
selected_area = selected_option
|
368 |
-
tasks_in_area = AREA_DEFINITIONS[selected_area]
|
369 |
-
displayed_cols = [
|
370 |
-
AutoEvalColumn.model_type_symbol.name,
|
371 |
-
AutoEvalColumn.model.name,
|
372 |
-
] + [task.name for task in tasks_in_area]
|
373 |
-
hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + [
|
374 |
-
task.name for task in Tasks if task not in tasks_in_area
|
375 |
-
] + [
|
376 |
-
AutoEvalColumn.model_type.name,
|
377 |
-
AutoEvalColumn.architecture.name,
|
378 |
-
AutoEvalColumn.weight_type.name,
|
379 |
-
AutoEvalColumn.precision.name,
|
380 |
-
AutoEvalColumn.license.name,
|
381 |
-
AutoEvalColumn.params.name,
|
382 |
-
AutoEvalColumn.likes.name,
|
383 |
-
AutoEvalColumn.still_on_hub.name,
|
384 |
-
AutoEvalColumn.revision.name
|
385 |
-
]
|
386 |
-
title = selected_area
|
387 |
-
final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
|
388 |
-
return displayed_cols, final_hidden_cols, title
|
389 |
-
|
390 |
-
# Pré-renderização dos Groups e Leaderboards
|
391 |
-
plue_containers = {}
|
392 |
-
all_plue_options = [PLUE_GENERAL_VIEW_NAME] + PLUE_GROUP_AREAS
|
393 |
-
for option in all_plue_options:
|
394 |
-
displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
|
395 |
-
is_visible = (option == PLUE_GENERAL_VIEW_NAME)
|
396 |
-
with gr.Group(visible=is_visible) as plue_containers[option]:
|
397 |
-
create_leaderboard_component(\
|
398 |
-
LEADERBOARD_DF, \
|
399 |
-
displayed_cols=displayed_cols,\
|
400 |
-
hidden_cols=hidden_cols, \
|
401 |
-
title=title\
|
402 |
-
)\
|
403 |
-
|
404 |
-
# Função de callback para visibilidade
|
405 |
-
def switch_plue_view(selected_option):
|
406 |
-
update_list = []
|
407 |
-
for option in all_plue_options:
|
408 |
-
update_list.append(gr.update(visible=(option == selected_option)))
|
409 |
-
return update_list
|
410 |
-
|
411 |
-
# Evento change
|
412 |
-
plue_dropdown.change(\
|
413 |
-
fn=switch_plue_view,\
|
414 |
-
inputs=[plue_dropdown],
|
415 |
-
outputs=[plue_containers[option] for option in all_plue_options] \
|
416 |
-
)\
|
417 |
-
# >>> FIM LÓGICA INTERNA PLUE <<<
|
418 |
-
|
419 |
-
tab_index += 1 # Incrementar após criar a aba PLUE
|
420 |
-
# Se a área é PLUE mas a aba já foi criada, não faz nada neste loop
|
421 |
-
|
422 |
-
with gr.TabItem("📤 Submeta aqui!", id=tab_index):
|
423 |
with gr.Column():
|
424 |
with gr.Row():
|
425 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
@@ -460,7 +425,7 @@ with demo:
|
|
460 |
row_count=5,
|
461 |
)
|
462 |
with gr.Row():
|
463 |
-
gr.Markdown("#
|
464 |
|
465 |
with gr.Row():
|
466 |
with gr.Column():
|
|
|
251 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
252 |
|
253 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
254 |
+
# --- Definir Ordem das Abas ---
|
255 |
+
tab_index = 0
|
256 |
+
|
257 |
+
# 1. Benchmark Geral
|
258 |
+
with gr.TabItem("📊 Benchmark Geral", id=tab_index):
|
259 |
+
# Colunas a exibir: T, Modelo, Média Geral, PLUE (remover outras médias)\
|
260 |
general_cols_to_display = [
|
261 |
AutoEvalColumn.model_type_symbol.name, # T
|
262 |
AutoEvalColumn.model.name, # Modelo
|
263 |
AutoEvalColumn.average.name, # Média Geral
|
264 |
AutoEvalColumn.plue_avg.name, # Média PLUE
|
265 |
+
# Remover médias individuais das áreas NÃO PLUE (agora todas estão em PLUE)
|
266 |
+
# AutoEvalColumn.discurso_odio_avg.name,\
|
267 |
+
# AutoEvalColumn.economia_contabilidade_avg.name,\
|
268 |
+
# AutoEvalColumn.semantica_inferencia_avg.name,\
|
269 |
]
|
|
|
270 |
general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
|
271 |
+
|
272 |
+
# Colunas a ocultar: Todas as tasks + TODAS as médias de área individuais + detalhes
|
273 |
+
general_hidden_cols = [task.name for task in Tasks] + \
|
274 |
+
list(AREA_AVG_COLUMN_MAP.values()) + \
|
275 |
+
[
|
276 |
+
AutoEvalColumn.model_type.name,
|
277 |
+
AutoEvalColumn.architecture.name,
|
278 |
+
AutoEvalColumn.weight_type.name,
|
279 |
+
AutoEvalColumn.precision.name,
|
280 |
+
AutoEvalColumn.license.name,
|
281 |
+
AutoEvalColumn.params.name,
|
282 |
+
AutoEvalColumn.likes.name,
|
283 |
+
AutoEvalColumn.still_on_hub.name,
|
284 |
+
AutoEvalColumn.revision.name
|
285 |
+
]
|
286 |
create_leaderboard_component(
|
287 |
LEADERBOARD_DF,
|
288 |
displayed_cols=general_cols_to_display,
|
289 |
hidden_cols=[col for col in general_hidden_cols if col in LEADERBOARD_DF.columns],
|
290 |
title="Benchmark Geral"
|
291 |
)
|
292 |
+
tab_index += 1
|
293 |
+
|
294 |
+
# 2. PLUE
|
295 |
+
with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
|
296 |
+
# --- Lógica interna da aba PLUE (atualizada) ---
|
297 |
+
gr.Markdown("## Selecione a visualização PLUE:")
|
298 |
+
# ATUALIZAR choices com base na nova PLUE_GROUP_AREAS
|
299 |
+
all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS) # Ordenar para consistência
|
300 |
+
plue_dropdown = gr.Dropdown(
|
301 |
+
choices=all_plue_options,
|
302 |
+
label="Visualização PLUE",
|
303 |
+
value=PLUE_GENERAL_VIEW_NAME
|
304 |
+
)
|
305 |
+
|
306 |
+
# Função auxiliar (ATUALIZAR lógica para PLUE_GENERAL_VIEW_NAME)
|
307 |
+
def get_plue_leaderboard_config(selected_option):
|
308 |
+
if selected_option == PLUE_GENERAL_VIEW_NAME:
|
309 |
+
# Visão geral PLUE agora mostra TODAS as médias das áreas PLUE
|
310 |
+
displayed_cols = [
|
311 |
+
AutoEvalColumn.model_type_symbol.name,
|
312 |
+
AutoEvalColumn.model.name,
|
313 |
+
] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
|
314 |
+
hidden_cols = [task.name for task in Tasks] + \
|
315 |
+
[AutoEvalColumn.average.name] + \
|
316 |
+
[
|
317 |
+
AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
|
318 |
+
AutoEvalColumn.model_type.name,
|
319 |
+
AutoEvalColumn.architecture.name,
|
320 |
+
AutoEvalColumn.weight_type.name,
|
321 |
+
AutoEvalColumn.precision.name,
|
322 |
+
AutoEvalColumn.license.name,
|
323 |
+
AutoEvalColumn.params.name,
|
324 |
+
AutoEvalColumn.likes.name,
|
325 |
+
AutoEvalColumn.still_on_hub.name,
|
326 |
+
AutoEvalColumn.revision.name
|
327 |
+
]
|
328 |
+
title = PLUE_GENERAL_VIEW_NAME
|
329 |
+
else:
|
330 |
+
# Lógica para área específica (inalterada)
|
331 |
+
selected_area = selected_option
|
332 |
+
tasks_in_area = AREA_DEFINITIONS[selected_area]
|
333 |
+
displayed_cols = [
|
334 |
AutoEvalColumn.model_type_symbol.name,
|
335 |
AutoEvalColumn.model.name,
|
336 |
] + [task.name for task in tasks_in_area]
|
337 |
+
hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + \
|
338 |
+
[task.name for task in Tasks if task not in tasks_in_area] + \
|
339 |
+
[
|
340 |
+
AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
|
341 |
+
AutoEvalColumn.average.name, # Ocultar média geral aqui também
|
342 |
+
AutoEvalColumn.model_type.name,
|
343 |
+
AutoEvalColumn.architecture.name,
|
344 |
+
AutoEvalColumn.weight_type.name,
|
345 |
+
AutoEvalColumn.precision.name,
|
346 |
+
AutoEvalColumn.license.name,
|
347 |
+
AutoEvalColumn.params.name,
|
348 |
+
AutoEvalColumn.likes.name,
|
349 |
+
AutoEvalColumn.still_on_hub.name,
|
350 |
+
AutoEvalColumn.revision.name
|
351 |
+
]
|
352 |
+
title = selected_area
|
353 |
+
final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
|
354 |
+
return displayed_cols, final_hidden_cols, title
|
355 |
+
|
356 |
+
# Pré-renderização (ATUALIZAR loop com novas all_plue_options)
|
357 |
+
plue_containers = {}
|
358 |
+
for option in all_plue_options:
|
359 |
+
displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
|
360 |
+
is_visible = (option == PLUE_GENERAL_VIEW_NAME)
|
361 |
+
with gr.Group(visible=is_visible) as plue_containers[option]:
|
362 |
+
create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
|
363 |
+
|
364 |
+
# Função de callback (inalterada, mas opera sobre novas opções)
|
365 |
+
def switch_plue_view(selected_option):
|
366 |
+
update_list = []
|
367 |
+
for option in all_plue_options:
|
368 |
+
update_list.append(gr.update(visible=(option == selected_option)))
|
369 |
+
return update_list
|
370 |
+
|
371 |
+
# Evento change (ATUALIZAR outputs com novas all_plue_options)
|
372 |
+
plue_dropdown.change(fn=switch_plue_view, inputs=[plue_dropdown], outputs=[plue_containers[option] for option in all_plue_options])
|
373 |
+
# --- Fim Lógica PLUE ---
|
374 |
+
tab_index += 1
|
375 |
+
|
376 |
+
# 3. Energy
|
377 |
+
with gr.TabItem("⚡️ Energy", id=tab_index):
|
378 |
+
create_leaderboard_component(pd.DataFrame(), [], title="Energy")
|
379 |
+
tab_index += 1
|
380 |
+
|
381 |
+
# 4. Reasoning
|
382 |
+
with gr.TabItem("🤔 Reasoning", id=tab_index):
|
383 |
+
create_leaderboard_component(pd.DataFrame(), [], title="Reasoning")
|
384 |
+
tab_index += 1
|
385 |
+
|
386 |
+
# 5. Submit
|
387 |
+
with gr.TabItem("�� Submit aqui!", id=tab_index):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
with gr.Column():
|
389 |
with gr.Row():
|
390 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
425 |
row_count=5,
|
426 |
)
|
427 |
with gr.Row():
|
428 |
+
gr.Markdown("# ✉️✨ Submeta seu modelo aqui!", elem_classes="markdown-text")
|
429 |
|
430 |
with gr.Row():
|
431 |
with gr.Column():
|