Update space
Browse files- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +6 -4
- src/populate.py +1 -1
src/display/utils.py
CHANGED
|
@@ -59,7 +59,7 @@ for domain in Domains:
|
|
| 59 |
|
| 60 |
auto_eval_column_dict.append(["organization", ColumnContent, field(default_factory=lambda: ColumnContent("Organization", "str", False))])
|
| 61 |
auto_eval_column_dict.append(["knowledge_cutoff", ColumnContent, field(default_factory=lambda: ColumnContent("Knowledge cutoff", "str", False))])
|
| 62 |
-
|
| 63 |
|
| 64 |
for task in Tasks:
|
| 65 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda: ColumnContent(task.value.col_name, "number", True))])
|
|
|
|
| 59 |
|
| 60 |
auto_eval_column_dict.append(["organization", ColumnContent, field(default_factory=lambda: ColumnContent("Organization", "str", False))])
|
| 61 |
auto_eval_column_dict.append(["knowledge_cutoff", ColumnContent, field(default_factory=lambda: ColumnContent("Knowledge cutoff", "str", False))])
|
| 62 |
+
auto_eval_column_dict.append(["score", ColumnContent, field(default_factory=lambda: ColumnContent("Score", "number", True))])
|
| 63 |
|
| 64 |
for task in Tasks:
|
| 65 |
auto_eval_column_dict.append([task.name, ColumnContent, field(default_factory=lambda: ColumnContent(task.value.col_name, "number", True))])
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -34,11 +34,13 @@ class ModelResult:
|
|
| 34 |
license = config.get("license")
|
| 35 |
knowledge_cutoff = config.get("knowledge_cutoff")
|
| 36 |
|
|
|
|
|
|
|
| 37 |
# Extract results available in this file (some results are split in several files)
|
| 38 |
results = {}
|
| 39 |
for domain in Domains:
|
| 40 |
domain = domain.value
|
| 41 |
-
results[domain.dimension] =
|
| 42 |
|
| 43 |
return self(
|
| 44 |
eval_name=f"{org}_{model}",
|
|
@@ -53,13 +55,13 @@ class ModelResult:
|
|
| 53 |
def to_dict(self):
|
| 54 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
average = 1
|
| 58 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 59 |
data_dict = {
|
| 60 |
-
"eval_name": self.eval_name, # not a column, just a save name,
|
| 61 |
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 62 |
AutoEvalColumn.model.name: self.model,
|
|
|
|
| 63 |
AutoEvalColumn.license.name: self.license,
|
| 64 |
AutoEvalColumn.organization.name: self.org,
|
| 65 |
AutoEvalColumn.knowledge_cutoff.name: self.knowledge_cutoff,
|
|
|
|
| 34 |
license = config.get("license")
|
| 35 |
knowledge_cutoff = config.get("knowledge_cutoff")
|
| 36 |
|
| 37 |
+
model_results = data.get("results")
|
| 38 |
+
|
| 39 |
# Extract results available in this file (some results are split in several files)
|
| 40 |
results = {}
|
| 41 |
for domain in Domains:
|
| 42 |
domain = domain.value
|
| 43 |
+
results[domain.dimension] = model_results.get(domain.dimension).get(domain.metric, None)
|
| 44 |
|
| 45 |
return self(
|
| 46 |
eval_name=f"{org}_{model}",
|
|
|
|
| 55 |
def to_dict(self):
|
| 56 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 57 |
|
| 58 |
+
# score = 1 / self.results[Domains.dim0.dimension] if self.results[Domains.dim0.dimension] != 0 else 0
|
|
|
|
| 59 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
| 60 |
data_dict = {
|
| 61 |
+
# "eval_name": self.eval_name, # not a column, just a save name,
|
| 62 |
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 63 |
AutoEvalColumn.model.name: self.model,
|
| 64 |
+
AutoEvalColumn.score.name: self.results[Domains.dim0.value.dimension],
|
| 65 |
AutoEvalColumn.license.name: self.license,
|
| 66 |
AutoEvalColumn.organization.name: self.org,
|
| 67 |
AutoEvalColumn.knowledge_cutoff.name: self.knowledge_cutoff,
|
src/populate.py
CHANGED
|
@@ -14,7 +14,7 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
-
|
| 18 |
# print(cols) # []
|
| 19 |
# print(df.columns) # ['eval_name', 'Model', 'Hub License', 'Organization', 'Knowledge cutoff', 'Overall']
|
| 20 |
# exit()
|
|
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
+
df = df.sort_values(by=[AutoEvalColumn.score.name], ascending=True)
|
| 18 |
# print(cols) # []
|
| 19 |
# print(df.columns) # ['eval_name', 'Model', 'Hub License', 'Organization', 'Knowledge cutoff', 'Overall']
|
| 20 |
# exit()
|