LucasLima commited on
Commit
8f26ebb
·
verified ·
1 Parent(s): 9d5c760

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -53
app.py CHANGED
@@ -256,20 +256,20 @@ with demo:
256
 
257
  # 1. Benchmark Geral
258
  with gr.TabItem("📊 Benchmark Geral", id=tab_index):
259
- # Colunas a exibir: T, Modelo, Média Geral, PLUE (remover outras médias)\
260
  general_cols_to_display = [
261
  AutoEvalColumn.model_type_symbol.name, # T
262
  AutoEvalColumn.model.name, # Modelo
263
  AutoEvalColumn.average.name, # Média Geral
264
  AutoEvalColumn.plue_avg.name, # Média PLUE
265
- # Remover médias individuais das áreas NÃO PLUE (agora todas estão em PLUE)
266
- # AutoEvalColumn.discurso_odio_avg.name,\
267
- # AutoEvalColumn.economia_contabilidade_avg.name,\
268
- # AutoEvalColumn.semantica_inferencia_avg.name,\
269
  ]
270
  general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
271
 
272
- # Colunas a ocultar: Todas as tasks + TODAS as médias de área individuais + detalhes
 
273
  general_hidden_cols = [task.name for task in Tasks] + \
274
  list(AREA_AVG_COLUMN_MAP.values()) + \
275
  [
@@ -291,69 +291,34 @@ with demo:
291
  )
292
  tab_index += 1
293
 
294
- # 2. PLUE
295
  with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
296
- # --- Lógica interna da aba PLUE (atualizada) ---
297
  gr.Markdown("## Selecione a visualização PLUE:")
298
- # ATUALIZAR choices com base na nova PLUE_GROUP_AREAS
299
- all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS) # Ordenar para consistência
300
  plue_dropdown = gr.Dropdown(
301
  choices=all_plue_options,
302
  label="Visualização PLUE",
303
  value=PLUE_GENERAL_VIEW_NAME
304
  )
305
 
306
- # Função auxiliar (ATUALIZAR lógica para PLUE_GENERAL_VIEW_NAME)
307
  def get_plue_leaderboard_config(selected_option):
308
  if selected_option == PLUE_GENERAL_VIEW_NAME:
309
- # Visão geral PLUE agora mostra TODAS as médias das áreas PLUE
310
- displayed_cols = [
311
- AutoEvalColumn.model_type_symbol.name,
312
- AutoEvalColumn.model.name,
313
- ] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
314
- hidden_cols = [task.name for task in Tasks] + \
315
- [AutoEvalColumn.average.name] + \
316
- [
317
- AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
318
- AutoEvalColumn.model_type.name,
319
- AutoEvalColumn.architecture.name,
320
- AutoEvalColumn.weight_type.name,
321
- AutoEvalColumn.precision.name,
322
- AutoEvalColumn.license.name,
323
- AutoEvalColumn.params.name,
324
- AutoEvalColumn.likes.name,
325
- AutoEvalColumn.still_on_hub.name,
326
- AutoEvalColumn.revision.name
327
- ]
328
  title = PLUE_GENERAL_VIEW_NAME
329
  else:
330
- # Lógica para área específica (inalterada)
331
  selected_area = selected_option
332
  tasks_in_area = AREA_DEFINITIONS[selected_area]
333
- displayed_cols = [
334
- AutoEvalColumn.model_type_symbol.name,
335
- AutoEvalColumn.model.name,
336
- ] + [task.name for task in tasks_in_area]
337
- hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + \
338
- [task.name for task in Tasks if task not in tasks_in_area] + \
339
- [
340
- AutoEvalColumn.plue_avg.name, # Ocultar média PLUE agregada aqui
341
- AutoEvalColumn.average.name, # Ocultar média geral aqui também
342
- AutoEvalColumn.model_type.name,
343
- AutoEvalColumn.architecture.name,
344
- AutoEvalColumn.weight_type.name,
345
- AutoEvalColumn.precision.name,
346
- AutoEvalColumn.license.name,
347
- AutoEvalColumn.params.name,
348
- AutoEvalColumn.likes.name,
349
- AutoEvalColumn.still_on_hub.name,
350
- AutoEvalColumn.revision.name
351
- ]
352
  title = selected_area
353
  final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
354
  return displayed_cols, final_hidden_cols, title
355
 
356
- # Pré-renderização (ATUALIZAR loop com novas all_plue_options)
357
  plue_containers = {}
358
  for option in all_plue_options:
359
  displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
@@ -361,7 +326,7 @@ with demo:
361
  with gr.Group(visible=is_visible) as plue_containers[option]:
362
  create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
363
 
364
- # Função de callback (inalterada, mas opera sobre novas opções)
365
  def switch_plue_view(selected_option):
366
  update_list = []
367
  for option in all_plue_options:
@@ -373,8 +338,32 @@ with demo:
373
  # --- Fim Lógica PLUE ---
374
  tab_index += 1
375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  # 5. Submit
377
- with gr.TabItem("✉️✨ Submeta seu modelo aqui!", id=tab_index):
378
  with gr.Column():
379
  with gr.Row():
380
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
256
 
257
  # 1. Benchmark Geral
258
  with gr.TabItem("📊 Benchmark Geral", id=tab_index):
259
+ # Colunas a exibir: T, Modelo, Média Geral, PLUE
260
  general_cols_to_display = [
261
  AutoEvalColumn.model_type_symbol.name, # T
262
  AutoEvalColumn.model.name, # Modelo
263
  AutoEvalColumn.average.name, # Média Geral
264
  AutoEvalColumn.plue_avg.name, # Média PLUE
265
+ # Adicionar médias Energy/Reasoning se desejado (opcional)
266
+ # AutoEvalColumn.energy_avg.name,
267
+ # AutoEvalColumn.reasoning_avg.name,
 
268
  ]
269
  general_cols_to_display = [col for col in general_cols_to_display if col in LEADERBOARD_DF.columns]
270
 
271
+ # Colunas a ocultar: Tasks + Médias de Área Individuais (PLUE e não-PLUE) + detalhes
272
+ # (Garantir que Energy/Reasoning avg estão aqui se não forem exibidas)
273
  general_hidden_cols = [task.name for task in Tasks] + \
274
  list(AREA_AVG_COLUMN_MAP.values()) + \
275
  [
 
291
  )
292
  tab_index += 1
293
 
294
+ # 2. PLUE (Agora apenas com as áreas originais + 3 adicionadas)
295
  with gr.TabItem("📚 PLUE", id=tab_index) as plue_tab:
296
+ # --- Lógica interna da aba PLUE (ajustada) ---
297
  gr.Markdown("## Selecione a visualização PLUE:")
298
+ # RECALCULAR choices e options com base na PLUE_GROUP_AREAS atualizada (sem Energy/Reasoning)
299
+ all_plue_options = [PLUE_GENERAL_VIEW_NAME] + sorted(PLUE_GROUP_AREAS)
300
  plue_dropdown = gr.Dropdown(
301
  choices=all_plue_options,
302
  label="Visualização PLUE",
303
  value=PLUE_GENERAL_VIEW_NAME
304
  )
305
 
306
+ # Função auxiliar (lógica interna não muda, mas opera sobre PLUE_GROUP_AREAS atualizada)
307
  def get_plue_leaderboard_config(selected_option):
308
  if selected_option == PLUE_GENERAL_VIEW_NAME:
309
+ displayed_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name,] + [AREA_AVG_COLUMN_MAP[area] for area in PLUE_GROUP_AREAS if area in AREA_AVG_COLUMN_MAP]
310
+ hidden_cols = [task.name for task in Tasks] + [avg_col for area, avg_col in AREA_AVG_COLUMN_MAP.items() if area not in PLUE_GROUP_AREAS] + [AutoEvalColumn.average.name] + [AutoEvalColumn.plue_avg.name, AutoEvalColumn.model_type.name, AutoEvalColumn.architecture.name, AutoEvalColumn.weight_type.name, AutoEvalColumn.precision.name, AutoEvalColumn.license.name, AutoEvalColumn.params.name, AutoEvalColumn.likes.name, AutoEvalColumn.still_on_hub.name, AutoEvalColumn.revision.name]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  title = PLUE_GENERAL_VIEW_NAME
312
  else:
 
313
  selected_area = selected_option
314
  tasks_in_area = AREA_DEFINITIONS[selected_area]
315
+ displayed_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name,] + [task.name for task in tasks_in_area]
316
+ hidden_cols = list(AREA_AVG_COLUMN_MAP.values()) + [task.name for task in Tasks if task not in tasks_in_area] + [AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name, AutoEvalColumn.model_type.name, AutoEvalColumn.architecture.name, AutoEvalColumn.weight_type.name, AutoEvalColumn.precision.name, AutoEvalColumn.license.name, AutoEvalColumn.params.name, AutoEvalColumn.likes.name, AutoEvalColumn.still_on_hub.name, AutoEvalColumn.revision.name]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  title = selected_area
318
  final_hidden_cols = [col for col in hidden_cols if col in LEADERBOARD_DF.columns]
319
  return displayed_cols, final_hidden_cols, title
320
 
321
+ # Pré-renderização (ATUALIZAR loop e containers com novas all_plue_options)
322
  plue_containers = {}
323
  for option in all_plue_options:
324
  displayed_cols, hidden_cols, title = get_plue_leaderboard_config(option)
 
326
  with gr.Group(visible=is_visible) as plue_containers[option]:
327
  create_leaderboard_component(LEADERBOARD_DF, displayed_cols=displayed_cols, hidden_cols=hidden_cols, title=title)
328
 
329
+ # Função de callback (ATUALIZAR loop com novas all_plue_options)
330
  def switch_plue_view(selected_option):
331
  update_list = []
332
  for option in all_plue_options:
 
338
  # --- Fim Lógica PLUE ---
339
  tab_index += 1
340
 
341
+ # 3. Energy
342
+ with gr.TabItem("⚡️ Energy", id=tab_index):
343
+ # Exibir leaderboard com dados de Energy
344
+ energy_tasks = AREA_DEFINITIONS.get("Energy", [])
345
+ energy_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + [t.name for t in energy_tasks]
346
+ energy_hidden = [t.name for t in Tasks if t not in energy_tasks] + \
347
+ list(AREA_AVG_COLUMN_MAP.values()) + \
348
+ [AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name] + \
349
+ [c.name for c in fields(AutoEvalColumn) if c.name not in energy_cols and c.name != AutoEvalColumn.model_type_symbol.name and c.name != AutoEvalColumn.model.name ] # Detalhes
350
+ create_leaderboard_component(LEADERBOARD_DF, displayed_cols=energy_cols, hidden_cols=[c for c in energy_hidden if c in LEADERBOARD_DF.columns], title="Energy")
351
+ tab_index += 1
352
+
353
+ # 4. Reasoning
354
+ with gr.TabItem("🤔 Reasoning", id=tab_index):
355
+ # Exibir leaderboard com dados de Reasoning
356
+ reasoning_tasks = AREA_DEFINITIONS.get("Reasoning", [])
357
+ reasoning_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + [t.name for t in reasoning_tasks]
358
+ reasoning_hidden = [t.name for t in Tasks if t not in reasoning_tasks] + \
359
+ list(AREA_AVG_COLUMN_MAP.values()) + \
360
+ [AutoEvalColumn.plue_avg.name, AutoEvalColumn.average.name] + \
361
+ [c.name for c in fields(AutoEvalColumn) if c.name not in reasoning_cols and c.name != AutoEvalColumn.model_type_symbol.name and c.name != AutoEvalColumn.model.name ] # Detalhes
362
+ create_leaderboard_component(LEADERBOARD_DF, displayed_cols=reasoning_cols, hidden_cols=[c for c in reasoning_hidden if c in LEADERBOARD_DF.columns], title="Reasoning")
363
+ tab_index += 1
364
+
365
  # 5. Submit
366
+ with gr.TabItem("📤 Submit aqui!", id=tab_index):
367
  with gr.Column():
368
  with gr.Row():
369
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")