|
import glob
|
|
import json
|
|
import math
|
|
import os
|
|
from dataclasses import dataclass
|
|
from src.display.formatting import make_clickable_model
|
|
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType
|
|
from src.submission.check_validity import is_model_on_hub
|
|
|
|
@dataclass
|
|
class EvalResult:
|
|
eval_name: str
|
|
full_model: str
|
|
org: str
|
|
model: str
|
|
revision: str
|
|
results: dict
|
|
average_CPS: str
|
|
fewshot: int
|
|
fewshot_type: FewShotType = FewShotType.Unknown
|
|
weight_type: WeightType = WeightType.Original
|
|
architecture: str = "Unknown"
|
|
license: str = "?"
|
|
likes: int = 0
|
|
num_params: int = 0
|
|
date: str = ""
|
|
still_on_hub: bool = False
|
|
|
|
@classmethod
|
|
def init_from_json_file(cls, json_filepath):
|
|
with open(json_filepath) as fp:
|
|
data = json.load(fp)
|
|
|
|
config = data.get("config")
|
|
average_CPS = f"{data.get('average_CPS'):.2f}"
|
|
|
|
num_fewshot = int(config.get("num_fewshot", 0))
|
|
fewshot_type = FewShotType.from_num_fewshot(num_fewshot)
|
|
|
|
model_type = ModelType.from_str(config.get("model_type")) if config.get("model_type") else None
|
|
num_params = math.ceil(config.get("num_params_billion", 0)) if config.get("num_params_billion") else 0
|
|
|
|
org_and_model = config.get("model_name", "").split("/", 1)
|
|
org, model = (org_and_model if len(org_and_model) == 2 else (None, org_and_model[0]))
|
|
|
|
full_model = "/".join([org, model] if org else [model])
|
|
still_on_hub, _, model_config = is_model_on_hub(full_model, config.get("model_sha", "main"))
|
|
|
|
architecture = ";".join(getattr(model_config, "architectures", [])) if model_config else "?"
|
|
|
|
results = {
|
|
task.value.benchmark: f"{data['tasks'].get(task.value.benchmark, {}).get(task.metric_type, 0):.2f}"
|
|
for task in Tasks
|
|
}
|
|
|
|
return cls(
|
|
eval_name=f"{model}_{num_fewshot}",
|
|
full_model=full_model,
|
|
org=org,
|
|
model=model,
|
|
results=results,
|
|
average_CPS=average_CPS,
|
|
fewshot=fewshot_type,
|
|
fewshot_type=fewshot_type,
|
|
revision=config.get("model_sha", ""),
|
|
still_on_hub=still_on_hub,
|
|
architecture=architecture,
|
|
num_params=num_params
|
|
)
|
|
|
|
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
|
model_result_filepaths = [
|
|
os.path.join(root, file)
|
|
for root, _, files in os.walk(results_path)
|
|
for file in sorted(files, key=lambda x: x.split("_")[-1], reverse=True) if file.endswith(".json")
|
|
]
|
|
|
|
eval_results = {}
|
|
for model_result_filepath in model_result_filepaths:
|
|
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
|
eval_name = eval_result.eval_name
|
|
if eval_name not in eval_results:
|
|
eval_results[eval_name] = eval_result
|
|
else:
|
|
eval_results[eval_name].results.update(eval_result.results)
|
|
|
|
results = []
|
|
for v in eval_results.values():
|
|
try:
|
|
v.to_dict()
|
|
results.append(v)
|
|
except KeyError:
|
|
continue
|
|
|
|
return results |