Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
•
79b2cd5
1
Parent(s):
dadbd30
wip improvement
Browse files- src/leaderboard/read_evals.py +27 -25
src/leaderboard/read_evals.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
from pathlib import Path
|
3 |
-
import
|
4 |
from json import JSONDecodeError
|
5 |
import logging
|
6 |
import math
|
@@ -189,39 +189,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
189 |
# Return empty string if no file found that matches criteria
|
190 |
return request_file
|
191 |
|
192 |
-
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
193 |
-
"""From the path of the results folder root, extract all needed info for results"""
|
194 |
-
model_result_filepaths = []
|
195 |
-
|
196 |
-
results_path = Path(results_path)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
# Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
|
207 |
try:
|
208 |
-
|
209 |
-
except
|
210 |
-
|
|
|
|
|
|
|
211 |
|
212 |
-
for file in files:
|
213 |
-
# Construct file path correctly, ensuring no duplication of path parts
|
214 |
-
model_result_filepath = file.resolve()
|
215 |
-
model_result_filepaths.append(model_result_filepath)
|
216 |
|
|
|
|
|
217 |
with open(dynamic_path) as f:
|
218 |
dynamic_data = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
eval_results = {}
|
221 |
-
for model_result_filepath in
|
222 |
# Creation of result
|
223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
224 |
eval_result.update_with_request_file(requests_path)
|
|
|
225 |
if eval_result.full_model in dynamic_data:
|
226 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
227 |
# Hardcoding because of gating problem
|
@@ -236,12 +237,13 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
236 |
eval_results[eval_name] = eval_result
|
237 |
|
238 |
results = []
|
239 |
-
for v in eval_results.
|
240 |
try:
|
241 |
if v.status == "FINISHED":
|
242 |
v.to_dict() # we test if the dict version is complete
|
243 |
results.append(v)
|
244 |
-
except KeyError
|
|
|
245 |
continue
|
246 |
|
247 |
return results
|
|
|
1 |
import json
|
2 |
from pathlib import Path
|
3 |
+
from datetime import datetime
|
4 |
from json import JSONDecodeError
|
5 |
import logging
|
6 |
import math
|
|
|
189 |
# Return empty string if no file found that matches criteria
|
190 |
return request_file
|
191 |
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
+
def parse_datetime(datetime_str):
|
194 |
+
formats = [
|
195 |
+
"%Y-%m-%dT%H-%M-%S.%f", # Format with dashes
|
196 |
+
"%Y-%m-%dT%H:%M:%S.%f", # Standard format with colons
|
197 |
+
"%Y-%m-%dT%H %M %S.%f", # Spaces as separator
|
198 |
+
]
|
199 |
+
|
200 |
+
for fmt in formats:
|
|
|
201 |
try:
|
202 |
+
return datetime.strptime(datetime_str, fmt)
|
203 |
+
except ValueError:
|
204 |
+
continue
|
205 |
+
# in rare cases set unix start time for files with incorrect time (legacy files)
|
206 |
+
logging.error(f"No valid date format found for: {datetime_str}")
|
207 |
+
return datetime(1970, 1, 1)
|
208 |
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
211 |
+
"""From the path of the results folder root, extract all needed info for results"""
|
212 |
with open(dynamic_path) as f:
|
213 |
dynamic_data = json.load(f)
|
214 |
+
|
215 |
+
results_path = Path(results_path)
|
216 |
+
|
217 |
+
model_files = list(results_path.rglob('results_*.json'))
|
218 |
+
model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
|
219 |
|
220 |
eval_results = {}
|
221 |
+
for model_result_filepath in model_files:
|
222 |
# Creation of result
|
223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
224 |
eval_result.update_with_request_file(requests_path)
|
225 |
+
|
226 |
if eval_result.full_model in dynamic_data:
|
227 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
228 |
# Hardcoding because of gating problem
|
|
|
237 |
eval_results[eval_name] = eval_result
|
238 |
|
239 |
results = []
|
240 |
+
for k, v in eval_results.items():
|
241 |
try:
|
242 |
if v.status == "FINISHED":
|
243 |
v.to_dict() # we test if the dict version is complete
|
244 |
results.append(v)
|
245 |
+
except KeyError as e:
|
246 |
+
logging.error(f"Error while checking model {k} dict, no key: {e}") # not all eval values present
|
247 |
continue
|
248 |
|
249 |
return results
|