Ahmed Ahmed commited on
Commit
25de5ef
·
1 Parent(s): c1fc4e2

consolidate

Browse files
src/display/utils.py CHANGED
@@ -28,9 +28,8 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
- # Add ⬆️ for metrics where higher is better, ⬇️ for metrics where lower is better
32
- arrow = "⬇️" if task.value.benchmark == "perplexity" else "⬆️"
33
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(f"{task.value.col_name} {arrow}", "number", True)])
34
  # Model information
35
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
36
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
 
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
+ # All perplexity scores show with ⬇️ since lower is better
32
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(f"{task.value.col_name} ⬇️", "number", True)])
 
33
  # Model information
34
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
35
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
src/leaderboard/read_evals.py CHANGED
@@ -100,6 +100,10 @@ class EvalResult:
100
  AutoEvalColumn.revision.name: self.revision,
101
  AutoEvalColumn.average.name: average,
102
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
 
 
 
 
103
  }
104
 
105
  for task in Tasks:
@@ -107,9 +111,9 @@ class EvalResult:
107
  if benchmark in self.results:
108
  score = self.results[benchmark]
109
  # Store original perplexity score (lower is better)
110
- data_dict[task.value.col_name] = score
111
  else:
112
- data_dict[task.value.col_name] = None
113
 
114
  return data_dict
115
 
 
100
  AutoEvalColumn.revision.name: self.revision,
101
  AutoEvalColumn.average.name: average,
102
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
103
+ # Add missing columns with default values
104
+ AutoEvalColumn.license.name: "Unknown", # Default license
105
+ AutoEvalColumn.params.name: 0, # Default params
106
+ AutoEvalColumn.likes.name: 0, # Default likes
107
  }
108
 
109
  for task in Tasks:
 
111
  if benchmark in self.results:
112
  score = self.results[benchmark]
113
  # Store original perplexity score (lower is better)
114
+ data_dict[f"{task.value.col_name} ⬇️"] = score
115
  else:
116
+ data_dict[f"{task.value.col_name} ⬇️"] = None
117
 
118
  return data_dict
119