LucasLima commited on
Commit
90b6b3b
·
verified ·
1 Parent(s): 3e8881f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -154
app.py CHANGED
@@ -251,175 +251,140 @@ with demo:
251
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
252
 
253
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
254
- with gr.TabItem("📊 Benchmark Geral", id=0):
255
- # Colunas a exibir por padrão na aba geral (Nova Ordem)
 
 
 
 
256
  general_cols_to_display = [
257
  AutoEvalColumn.model_type_symbol.name, # T
258
  AutoEvalColumn.model.name, # Modelo
259
  AutoEvalColumn.average.name, # Média Geral
260
  AutoEvalColumn.plue_avg.name, # Média PLUE
261
- # Médias das áreas NÃO PLUE
262
- AutoEvalColumn.discurso_odio_avg.name,
263
- AutoEvalColumn.economia_contabilidade_avg.name,
264
- AutoEvalColumn.semantica_inferencia_avg.name,
265
  ]
266
- # Garantir que só incluimos colunas que existem no DF
267
  general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
268
- general_hidden_cols = [task.name for task in Tasks] + [
269
- AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP
270
- ] + [
271
- AutoEvalColumn.model_type.name,
272
- AutoEvalColumn.architecture.name,
273
- AutoEvalColumn.weight_type.name,
274
- AutoEvalColumn.precision.name,
275
- AutoEvalColumn.license.name,
276
- AutoEvalColumn.params.name,
277
- AutoEvalColumn.likes.name,
278
- AutoEvalColumn.still_on_hub.name,
279
- AutoEvalColumn.revision.name
280
- ]
281
-
 
282
  create_leaderboard_component(
283
  LEADERBOARD_DF,
284
  displayed_cols=general_cols_to_display,
285
  hidden_cols=[col for col in general_hidden_cols if col in LEADERBOARD_DF.columns],
286
  title="Benchmark Geral"
287
  )
288
-
289
- # Áreas fora do grupo PLUE e criação da aba PLUE
290
- tab_index = 1
291
- plue_tab_created = False
292
- # Remover dicionário plue_containers daqui, ele será criado dentro da aba PLUE
293
- # plue_containers = {}
294
- # Remover lista all_plue_options daqui
295
- # all_plue_options = [PLUE_GENERAL_VIEW_NAME] + PLUE_GROUP_AREAS
296
-
297
- # Loop principal para criar as abas
298
- all_area_names = sorted(AREA_DEFINITIONS.keys())
299
- for area_name in all_area_names:
300
- if area_name not in PLUE_GROUP_AREAS:
301
- # >>> RESTAURAR CRIAÇÃO DAS ABAS NÃO-PLUE <<<
302
- with gr.TabItem(f"🎓 {area_name}", id=tab_index):
303
- tasks_in_area = AREA_DEFINITIONS[area_name]
304
- area_cols_to_display = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  AutoEvalColumn.model_type_symbol.name,
306
  AutoEvalColumn.model.name,
307
  ] + [task.name for task in tasks_in_area]
308
-
309
- area_hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + [
310
- task.name for task in Tasks if task not in tasks_in_area
311
- ] + [
312
- # Manter outras colunas ocultas como na aba geral
313
- AutoEvalColumn.model_type.name,
314
- AutoEvalColumn.architecture.name,
315
- AutoEvalColumn.weight_type.name,
316
- AutoEvalColumn.precision.name,
317
- AutoEvalColumn.license.name,
318
- AutoEvalColumn.params.name,
319
- AutoEvalColumn.likes.name,
320
- AutoEvalColumn.still_on_hub.name,
321
- AutoEvalColumn.revision.name
322
- ]
323
-
324
- create_leaderboard_component(
325
- LEADERBOARD_DF,
326
- displayed_cols=area_cols_to_display,
327
- hidden_cols=[col for col in area_hidden_cols if col in LEADERBOARD_DF.columns], # Ajuste se média geral deve ser oculta
328
- title=area_name
329
- )
330
- # >>> FIM RESTAURAR <<<
331
- tab_index += 1
332
- elif not plue_tab_created: # Criar a aba PLUE apenas uma vez
333
- with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
334
- plue_tab_created = True # Marcar como criada
335
- # >>> Lógica interna da aba PLUE <<<
336
- gr.Markdown("## Selecione a visualização PLUE:")
337
- plue_dropdown = gr.Dropdown(
338
- choices=[PLUE_GENERAL_VIEW_NAME] + PLUE_GROUP_AREAS,
339
- label="Visualização PLUE",
340
- value=PLUE_GENERAL_VIEW_NAME
341
- )
342
-
343
- # Função auxiliar
344
- def get_plue_leaderboard_config(selected_option):
345
- if selected_option == PLUE_GENERAL_VIEW_NAME:
346
- displayed_cols = [
347
- AutoEvalColumn.model_type_symbol.name,
348
- AutoEvalColumn.model.name,
349
- ] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
350
- hidden_cols = [task.name for task in Tasks] + [
351
- avg_col for area, avg_col in AREA_AVG_COLUMN_MAP.items() if area not in PLUE_GROUP_AREAS
352
- ] + [
353
- AutoEvalColumn.average.name
354
- ] + [
355
- AutoEvalColumn.model_type.name,
356
- AutoEvalColumn.architecture.name,
357
- AutoEvalColumn.weight_type.name,
358
- AutoEvalColumn.precision.name,
359
- AutoEvalColumn.license.name,
360
- AutoEvalColumn.params.name,
361
- AutoEvalColumn.likes.name,
362
- AutoEvalColumn.still_on_hub.name,
363
- AutoEvalColumn.revision.name
364
- ]
365
- title = PLUE_GENERAL_VIEW_NAME
366
- else:
367
- selected_area = selected_option
368
- tasks_in_area = AREA_DEFINITIONS[selected_area]
369
- displayed_cols = [
370
- AutoEvalColumn.model_type_symbol.name,
371
- AutoEvalColumn.model.name,
372
- ] + [task.name for task in tasks_in_area]
373
- hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + [
374
- task.name for task in Tasks if task not in tasks_in_area
375
- ] + [
376
- AutoEvalColumn.model_type.name,
377
- AutoEvalColumn.architecture.name,
378
- AutoEvalColumn.weight_type.name,
379
- AutoEvalColumn.precision.name,
380
- AutoEvalColumn.license.name,
381
- AutoEvalColumn.params.name,
382
- AutoEvalColumn.likes.name,
383
- AutoEvalColumn.still_on_hub.name,
384
- AutoEvalColumn.revision.name
385
- ]
386
- title = selected_area
387
- final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
388
- return displayed_cols, final_hidden_cols, title
389
-
390
- # Pré-renderização dos Groups e Leaderboards
391
- plue_containers = {}
392
- all_plue_options = [PLUE_GENERAL_VIEW_NAME] + PLUE_GROUP_AREAS
393
- for option in all_plue_options:
394
- displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
395
- is_visible = (option == PLUE_GENERAL_VIEW_NAME)
396
- with gr.Group(visible=is_visible) as plue_containers[option]:
397
- create_leaderboard_component(\
398
- LEADERBOARD_DF, \
399
- displayed_cols=displayed_cols,\
400
- hidden_cols=hidden_cols, \
401
- title=title\
402
- )\
403
-
404
- # Função de callback para visibilidade
405
- def switch_plue_view(selected_option):
406
- update_list = []
407
- for option in all_plue_options:
408
- update_list.append(gr.update(visible=(option == selected_option)))
409
- return update_list
410
-
411
- # Evento change
412
- plue_dropdown.change(\
413
- fn=switch_plue_view,\
414
- inputs=[plue_dropdown],
415
- outputs=[plue_containers[option] for option in all_plue_options] \
416
- )\
417
- # >>> FIM LÓGICA INTERNA PLUE <<<
418
-
419
- tab_index += 1 # Incrementar após criar a aba PLUE
420
- # Se a área é PLUE mas a aba já foi criada, não faz nada neste loop
421
-
422
- with gr.TabItem("📤 Submeta aqui!", id=tab_index):
423
  with gr.Column():
424
  with gr.Row():
425
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
@@ -460,7 +425,7 @@ with demo:
460
  row_count=5,
461
  )
462
  with gr.Row():
463
- gr.Markdown("# ✉✨ Submeta seu modelo aqui!", elem_classes="markdown-text")
464
 
465
  with gr.Row():
466
  with gr.Column():
 
251
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
252
 
253
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
254
+ # --- Definir Ordem das Abas ---
255
+ tab_index = 0
256
+
257
+ # 1. Benchmark Geral
258
+ with gr.TabItem("📊 Benchmark Geral", id=tab_index):
259
+ # Colunas a exibir: T, Modelo, Média Geral, PLUE (remover outras médias)\
260
  general_cols_to_display = [
261
  AutoEvalColumn.model_type_symbol.name, # T
262
  AutoEvalColumn.model.name, # Modelo
263
  AutoEvalColumn.average.name, # Média Geral
264
  AutoEvalColumn.plue_avg.name, # Média PLUE
265
+ # Remover médias individuais das áreas NÃO PLUE (agora todas estão em PLUE)
266
+ # AutoEvalColumn.discurso_odio_avg.name,\
267
+ # AutoEvalColumn.economia_contabilidade_avg.name,\
268
+ # AutoEvalColumn.semantica_inferencia_avg.name,\
269
  ]
 
270
  general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
271
+
272
+ # Colunas a ocultar: Todas as tasks + TODAS as médias de área individuais + detalhes
273
+ general_hidden_cols = [task.name for task in Tasks] + \
274
+ list(AREA_AVG_COLUMN_MAP.values()) + \
275
+ [
276
+ AutoEvalColumn.model_type.name,
277
+ AutoEvalColumn.architecture.name,
278
+ AutoEvalColumn.weight_type.name,
279
+ AutoEvalColumn.precision.name,
280
+ AutoEvalColumn.license.name,
281
+ AutoEvalColumn.params.name,
282
+ AutoEvalColumn.likes.name,
283
+ AutoEvalColumn.still_on_hub.name,
284
+ AutoEvalColumn.revision.name
285
+ ]
286
  create_leaderboard_component(
287
  LEADERBOARD_DF,
288
  displayed_cols=general_cols_to_display,
289
  hidden_cols=[col for col in general_hidden_cols if col in LEADERBOARD_DF.columns],
290
  title="Benchmark Geral"
291
  )
292
+ tab_index += 1
293
+
294
+ # 2. PLUE
295
+ with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
296
+ # --- Lógica interna da aba PLUE (atualizada) ---
297
+ gr.Markdown("## Selecione a visualização PLUE:")
298
+ # ATUALIZAR choices com base na nova PLUE_GROUP_AREAS
299
+ all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS) # Ordenar para consistência
300
+ plue_dropdown = gr.Dropdown(
301
+ choices=all_plue_options,
302
+ label="Visualização PLUE",
303
+ value=PLUE_GENERAL_VIEW_NAME
304
+ )
305
+
306
+ # Função auxiliar (ATUALIZAR lógica para PLUE_GENERAL_VIEW_NAME)
307
+ def get_plue_leaderboard_config(selected_option):
308
+ if selected_option == PLUE_GENERAL_VIEW_NAME:
309
+ # Visão geral PLUE agora mostra TODAS as médias das áreas PLUE
310
+ displayed_cols = [
311
+ AutoEvalColumn.model_type_symbol.name,
312
+ AutoEvalColumn.model.name,
313
+ ] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
314
+ hidden_cols = [task.name for task in Tasks] + \
315
+ [AutoEvalColumn.average.name] + \
316
+ [
317
+ AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
318
+ AutoEvalColumn.model_type.name,
319
+ AutoEvalColumn.architecture.name,
320
+ AutoEvalColumn.weight_type.name,
321
+ AutoEvalColumn.precision.name,
322
+ AutoEvalColumn.license.name,
323
+ AutoEvalColumn.params.name,
324
+ AutoEvalColumn.likes.name,
325
+ AutoEvalColumn.still_on_hub.name,
326
+ AutoEvalColumn.revision.name
327
+ ]
328
+ title = PLUE_GENERAL_VIEW_NAME
329
+ else:
330
+ # Lógica para área específica (inalterada)
331
+ selected_area = selected_option
332
+ tasks_in_area = AREA_DEFINITIONS[selected_area]
333
+ displayed_cols = [
334
  AutoEvalColumn.model_type_symbol.name,
335
  AutoEvalColumn.model.name,
336
  ] + [task.name for task in tasks_in_area]
337
+ hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + \
338
+ [task.name for task in Tasks if task not in tasks_in_area] + \
339
+ [
340
+ AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
341
+ AutoEvalColumn.average.name, # Ocultar média geral aqui também
342
+ AutoEvalColumn.model_type.name,
343
+ AutoEvalColumn.architecture.name,
344
+ AutoEvalColumn.weight_type.name,
345
+ AutoEvalColumn.precision.name,
346
+ AutoEvalColumn.license.name,
347
+ AutoEvalColumn.params.name,
348
+ AutoEvalColumn.likes.name,
349
+ AutoEvalColumn.still_on_hub.name,
350
+ AutoEvalColumn.revision.name
351
+ ]
352
+ title = selected_area
353
+ final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
354
+ return displayed_cols, final_hidden_cols, title
355
+
356
+ # Pré-renderização (ATUALIZAR loop com novas all_plue_options)
357
+ plue_containers = {}
358
+ for option in all_plue_options:
359
+ displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
360
+ is_visible = (option == PLUE_GENERAL_VIEW_NAME)
361
+ with gr.Group(visible=is_visible) as plue_containers[option]:
362
+ create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
363
+
364
+ # Função de callback (inalterada, mas opera sobre novas opções)
365
+ def switch_plue_view(selected_option):
366
+ update_list = []
367
+ for option in all_plue_options:
368
+ update_list.append(gr.update(visible=(option == selected_option)))
369
+ return update_list
370
+
371
+ # Evento change (ATUALIZAR outputs com novas all_plue_options)
372
+ plue_dropdown.change(fn=switch_plue_view, inputs=[plue_dropdown], outputs=[plue_containers[option] for option in all_plue_options])
373
+ # --- Fim Lógica PLUE ---
374
+ tab_index += 1
375
+
376
+ # 3. Energy
377
+ with gr.TabItem("⚡️ Energy", id=tab_index):
378
+ create_leaderboard_component(pd.DataFrame(), [], title="Energy")
379
+ tab_index += 1
380
+
381
+ # 4. Reasoning
382
+ with gr.TabItem("🤔 Reasoning", id=tab_index):
383
+ create_leaderboard_component(pd.DataFrame(), [], title="Reasoning")
384
+ tab_index += 1
385
+
386
+ # 5. Submit
387
+ with gr.TabItem("�� Submit aqui!", id=tab_index):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  with gr.Column():
389
  with gr.Row():
390
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
425
  row_count=5,
426
  )
427
  with gr.Row():
428
+ gr.Markdown("# ✉️✨ Submeta seu modelo aqui!", elem_classes="markdown-text")
429
 
430
  with gr.Row():
431
  with gr.Column():