rzanoli's picture
Small changes
cae4d0f
raw
history blame
3.36 kB
import glob
import json
import math
import os
from dataclasses import dataclass
from src.display.formatting import make_clickable_model
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType
from src.submission.check_validity import is_model_on_hub
@dataclass
class EvalResult:
eval_name: str
full_model: str
org: str
model: str
revision: str
results: dict
average_CPS: str
fewshot: int
fewshot_type: FewShotType = FewShotType.Unknown
weight_type: WeightType = WeightType.Original
architecture: str = "Unknown"
license: str = "?"
likes: int = 0
num_params: int = 0
date: str = ""
still_on_hub: bool = False
@classmethod
def init_from_json_file(cls, json_filepath):
with open(json_filepath) as fp:
data = json.load(fp)
config = data.get("config")
average_CPS = f"{data.get('average_CPS'):.2f}"
num_fewshot = int(config.get("num_fewshot", 0))
fewshot_type = FewShotType.from_num_fewshot(num_fewshot)
model_type = ModelType.from_str(config.get("model_type")) if config.get("model_type") else None
num_params = math.ceil(config.get("num_params_billion", 0)) if config.get("num_params_billion") else 0
org_and_model = config.get("model_name", "").split("/", 1)
org, model = (org_and_model if len(org_and_model) == 2 else (None, org_and_model[0]))
full_model = "/".join([org, model] if org else [model])
still_on_hub, _, model_config = is_model_on_hub(full_model, config.get("model_sha", "main"))
architecture = ";".join(getattr(model_config, "architectures", [])) if model_config else "?"
results = {
task.value.benchmark: f"{data['tasks'].get(task.value.benchmark, {}).get(task.metric_type, 0):.2f}"
for task in Tasks
}
return cls(
eval_name=f"{model}_{num_fewshot}",
full_model=full_model,
org=org,
model=model,
results=results,
average_CPS=average_CPS,
fewshot=fewshot_type,
fewshot_type=fewshot_type,
revision=config.get("model_sha", ""),
still_on_hub=still_on_hub,
architecture=architecture,
num_params=num_params
)
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
model_result_filepaths = [
os.path.join(root, file)
for root, _, files in os.walk(results_path)
for file in sorted(files, key=lambda x: x.split("_")[-1], reverse=True) if file.endswith(".json")
]
eval_results = {}
for model_result_filepath in model_result_filepaths:
eval_result = EvalResult.init_from_json_file(model_result_filepath)
eval_name = eval_result.eval_name
if eval_name not in eval_results:
eval_results[eval_name] = eval_result
else:
eval_results[eval_name].results.update(eval_result.results)
results = []
for v in eval_results.values():
try:
v.to_dict() # Test if the dict version is complete
results.append(v)
except KeyError:
continue
return results