Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1147

Alina Lozovskaia commited on Apr 25, 2024

Commit

79b2cd5

1 Parent(s): dadbd30

wip improvement

Browse files

Files changed (1) hide show

src/leaderboard/read_evals.py +27 -25

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-import dateutil.parser
 from json import JSONDecodeError
 import logging
 import math
@@ -189,39 +189,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
     # Return empty string if no file found that matches criteria
     return request_file
-def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
-    """From the path of the results folder root, extract all needed info for results"""
-    model_result_filepaths = []
-    results_path = Path(results_path)
-    for root in results_path.rglob('*'):
-        # root is now a Path object representing directories
-        files = list(root.glob('*.json'))  # Collect all .json files directly
-        # Check if the directory is empty or contains no .json files
-        if not files:
-            continue
-        # Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
         try:
-            files.sort(key=lambda x: x.stem.removeprefix("results_"))
-        except dateutil.parser._parser.ParserError:
-            files = [files[-1]]
-        for file in files:
-            # Construct file path correctly, ensuring no duplication of path parts
-            model_result_filepath = file.resolve()
-            model_result_filepaths.append(model_result_filepath)
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
     eval_results = {}
-    for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem
@@ -236,12 +237,13 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
             eval_results[eval_name] = eval_result
     results = []
-    for v in eval_results.values():
         try:
             if v.status == "FINISHED":
                 v.to_dict()  # we test if the dict version is complete
                 results.append(v)
-        except KeyError:  # not all eval values present
             continue
     return results

 import json
 from pathlib import Path
+from datetime import datetime
 from json import JSONDecodeError
 import logging
 import math
     # Return empty string if no file found that matches criteria
     return request_file
+def parse_datetime(datetime_str):
+    formats = [
+        "%Y-%m-%dT%H-%M-%S.%f",  # Format with dashes
+        "%Y-%m-%dT%H:%M:%S.%f",  # Standard format with colons
+        "%Y-%m-%dT%H %M %S.%f",  # Spaces as separator
+    ]
+    for fmt in formats:
         try:
+            return datetime.strptime(datetime_str, fmt)
+        except ValueError:
+            continue
+    # in rare cases set unix start time for files with incorrect time (legacy files)
+    logging.error(f"No valid date format found for: {datetime_str}")
+    return datetime(1970, 1, 1)
+def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
+    """From the path of the results folder root, extract all needed info for results"""
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
+    results_path = Path(results_path)
+    model_files = list(results_path.rglob('results_*.json'))
+    model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
     eval_results = {}
+    for model_result_filepath in model_files:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem
             eval_results[eval_name] = eval_result
     results = []
+    for k, v in eval_results.items():
         try:
             if v.status == "FINISHED":
                 v.to_dict()  # we test if the dict version is complete
                 results.append(v)
+        except KeyError as e:
+            logging.error(f"Error while checking model {k} dict, no key: {e}")  # not all eval values present
             continue
     return results