p50038325
commited on
Commit
·
f834299
1
Parent(s):
ea9345e
energy-score
Browse files- src/leaderboard/read_evals.py +20 -10
src/leaderboard/read_evals.py
CHANGED
@@ -223,16 +223,26 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
223 |
|
224 |
eval_results = {}
|
225 |
for model_result_filepath in model_result_filepaths:
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
results = []
|
238 |
for v in eval_results.values():
|
|
|
223 |
|
224 |
eval_results = {}
|
225 |
for model_result_filepath in model_result_filepaths:
|
226 |
+
try:
|
227 |
+
# Creation of result
|
228 |
+
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
229 |
+
|
230 |
+
# Skip entries with Unknown/Unknown model name
|
231 |
+
if eval_result.full_model == "Unknown/Unknown":
|
232 |
+
print(f"Skipping invalid result file: {model_result_filepath}")
|
233 |
+
continue
|
234 |
+
|
235 |
+
eval_result.update_with_request_file(requests_path)
|
236 |
+
|
237 |
+
# Store results of same eval together
|
238 |
+
eval_name = eval_result.eval_name
|
239 |
+
if eval_name in eval_results.keys():
|
240 |
+
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
241 |
+
else:
|
242 |
+
eval_results[eval_name] = eval_result
|
243 |
+
except Exception as e:
|
244 |
+
print(f"Error processing result file {model_result_filepath}: {str(e)}")
|
245 |
+
continue
|
246 |
|
247 |
results = []
|
248 |
for v in eval_results.values():
|