Titova Ksenia commited on
Commit
758c9c5
·
1 Parent(s): 1077ec2

remove average_pb

Browse files
src/display/utils.py CHANGED
@@ -27,7 +27,7 @@ auto_eval_column_dict = []
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Correlation ⬆️", "number", True)])
30
- auto_eval_column_dict.append(["average_pb", ColumnContent, ColumnContent("Positional Bias Impact", "number", True)])
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
33
  # Model information
 
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Correlation ⬆️", "number", True)])
30
+ # auto_eval_column_dict.append(["average_pb", ColumnContent, ColumnContent("Positional Bias Impact", "number", True)])
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
33
  # Model information
src/leaderboard/read_evals.py CHANGED
@@ -114,11 +114,7 @@ class EvalResult:
114
  def to_dict(self, mina=0, maxa=1):
115
  """Converts the Eval Result to a dict compatible with our dataframe display"""
116
  average = sum([self.results["apcc"], self.results["mpcc"]]) / 2
117
- print("self.results mpcc_delta", self.results["mpcc_delta"])
118
 
119
- norm_mpcc_delta = (float(self.results["mpcc_delta"]) - mina) / (maxa - mina)
120
- print("norm_mpcc_delta", norm_mpcc_delta)
121
- average_pb = sum([norm_mpcc_delta, self.results["mpcc_cons"], self.results["pcon_ab"]]) / 3
122
  data_dict = {
123
  "eval_name": self.eval_name, # not a column, just a save name,
124
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -129,7 +125,6 @@ class EvalResult:
129
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
130
  AutoEvalColumn.revision.name: self.revision,
131
  AutoEvalColumn.average.name: average,
132
- AutoEvalColumn.average_pb.name: average_pb,
133
  AutoEvalColumn.license.name: self.license,
134
  AutoEvalColumn.likes.name: self.likes,
135
  AutoEvalColumn.params.name: self.num_params,
@@ -201,11 +196,9 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
201
  eval_results[eval_name] = eval_result
202
 
203
  results = []
204
- mina = min([a.results["mpcc_delta"] for a in eval_results.values()])
205
- maxa = max([a.results["mpcc_delta"] for a in eval_results.values()])
206
  for v in eval_results.values():
207
  try:
208
- v.to_dict(mina, maxa) # we test if the dict version is complete
209
  results.append(v)
210
  except KeyError as e: # not all eval values present
211
  print("e", e)
 
114
  def to_dict(self, mina=0, maxa=1):
115
  """Converts the Eval Result to a dict compatible with our dataframe display"""
116
  average = sum([self.results["apcc"], self.results["mpcc"]]) / 2
 
117
 
 
 
 
118
  data_dict = {
119
  "eval_name": self.eval_name, # not a column, just a save name,
120
  AutoEvalColumn.precision.name: self.precision.value.name,
 
125
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
126
  AutoEvalColumn.revision.name: self.revision,
127
  AutoEvalColumn.average.name: average,
 
128
  AutoEvalColumn.license.name: self.license,
129
  AutoEvalColumn.likes.name: self.likes,
130
  AutoEvalColumn.params.name: self.num_params,
 
196
  eval_results[eval_name] = eval_result
197
 
198
  results = []
 
 
199
  for v in eval_results.values():
200
  try:
201
+ v.to_dict() # we test if the dict version is complete
202
  results.append(v)
203
  except KeyError as e: # not all eval values present
204
  print("e", e)
src/populate.py CHANGED
@@ -11,9 +11,7 @@ from src.leaderboard.read_evals import get_raw_eval_results
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
- mina = min([a.results["mpcc_delta"] for a in raw_data.values()])
15
- maxa = max([a.results["mpcc_delta"] for a in raw_data.values()])
16
- all_data_json = [v.to_dict(mina, maxa) for v in raw_data]
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
@@ -30,10 +28,10 @@ def set_style_for_leaderboard_df(df: pd.DataFrame) -> pd.DataFrame:
30
  # Adding CSS to style the specific column header
31
  styled_df.set_table_styles({
32
  AutoEvalColumn.average.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}],
33
- AutoEvalColumn.average_pb.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}]
34
  }, overwrite=False)
35
 
36
- styled_df.format(na_rep="").bar(align=0, subset=[AutoEvalColumn.average.name, AutoEvalColumn.average_pb.name], cmap="PiYG")
37
  return styled_df
38
 
39
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
 
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
+ all_data_json = [v.to_dict() for v in raw_data]
 
 
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
28
  # Adding CSS to style the specific column header
29
  styled_df.set_table_styles({
30
  AutoEvalColumn.average.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}],
31
+ # AutoEvalColumn.average_pb.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}]
32
  }, overwrite=False)
33
 
34
+ styled_df.format(na_rep="").bar(align=0, subset=[AutoEvalColumn.average.name], cmap="PiYG")
35
  return styled_df
36
 
37
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]: