import glob import json import math import os from dataclasses import dataclass from src.display.formatting import make_clickable_model from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType from src.submission.check_validity import is_model_on_hub @dataclass class EvalResult: eval_name: str full_model: str org: str model: str revision: str results: dict average_CPS: str fewshot: int fewshot_type: FewShotType = FewShotType.Unknown weight_type: WeightType = WeightType.Original architecture: str = "Unknown" license: str = "?" likes: int = 0 num_params: int = 0 date: str = "" still_on_hub: bool = False @classmethod def init_from_json_file(cls, json_filepath): with open(json_filepath) as fp: data = json.load(fp) config = data.get("config") average_CPS = f"{data.get('average_CPS'):.2f}" num_fewshot = int(config.get("num_fewshot", 0)) fewshot_type = FewShotType.from_num_fewshot(num_fewshot) model_type = ModelType.from_str(config.get("model_type")) if config.get("model_type") else None num_params = math.ceil(config.get("num_params_billion", 0)) if config.get("num_params_billion") else 0 org_and_model = config.get("model_name", "").split("/", 1) org, model = (org_and_model if len(org_and_model) == 2 else (None, org_and_model[0])) full_model = "/".join([org, model] if org else [model]) still_on_hub, _, model_config = is_model_on_hub(full_model, config.get("model_sha", "main")) architecture = ";".join(getattr(model_config, "architectures", [])) if model_config else "?" results = { task.value.benchmark: f"{data['tasks'].get(task.value.benchmark, {}).get(task.metric_type, 0):.2f}" for task in Tasks } return cls( eval_name=f"{model}_{num_fewshot}", full_model=full_model, org=org, model=model, results=results, average_CPS=average_CPS, fewshot=fewshot_type, fewshot_type=fewshot_type, revision=config.get("model_sha", ""), still_on_hub=still_on_hub, architecture=architecture, num_params=num_params ) def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]: model_result_filepaths = [ os.path.join(root, file) for root, _, files in os.walk(results_path) for file in sorted(files, key=lambda x: x.split("_")[-1], reverse=True) if file.endswith(".json") ] eval_results = {} for model_result_filepath in model_result_filepaths: eval_result = EvalResult.init_from_json_file(model_result_filepath) eval_name = eval_result.eval_name if eval_name not in eval_results: eval_results[eval_name] = eval_result else: eval_results[eval_name].results.update(eval_result.results) results = [] for v in eval_results.values(): try: v.to_dict() # Test if the dict version is complete results.append(v) except KeyError: continue return results