Spaces:
Running
Running
| import os | |
| import re | |
| import json | |
| import tyro | |
| from collections import namedtuple | |
| ModelBatch = namedtuple('ModelStruct', ['task','model_name', 'batch']) | |
| model_state = {} | |
| def load_and_print_length(task: str, root_dir: str) -> None: | |
| pattern = re.compile(r'benchmark_batch_(\d+).json') | |
| for subdir, _, files in os.walk(root_dir): | |
| for file in files: | |
| match = pattern.match(file) | |
| if match: | |
| file_path = os.path.join(subdir, file) | |
| batch = int(match.group(1)) | |
| print(f"Model: {os.path.basename(subdir)}, Batch size: {batch}") | |
| model_batch = ModelBatch(task=task, model_name=os.path.basename(subdir), batch=batch) | |
| try: | |
| with open(file_path, 'r') as f: | |
| data = json.load(f) | |
| print(f"Length: {len(data)}") | |
| if len(data) >= 2000 / batch: | |
| model_state[model_batch] = True | |
| else: | |
| model_state[model_batch] = False | |
| except json.JSONDecodeError: | |
| print(f"[ERR] results found ") | |
| model_state[model_batch] = False | |
| print("------") | |
| def main(data_dir: str) -> None: | |
| """Summarize the results collected for all models in the given directory. | |
| Args: | |
| data_dir: The directory containing the results. | |
| """ | |
| root_dir = ['chat', 'chat-concise', 'instruct', 'instruct-concise'] | |
| for dir in root_dir: | |
| print(dir) | |
| load_and_print_length(dir, f"{data_dir}/{dir}") | |
| print("complete instance:") | |
| for info, stat in model_state.items(): | |
| if stat is True: | |
| print(info) | |
| print("------") | |
| print("incomplete instance:") | |
| for info, stat in model_state.items(): | |
| if stat is not True: | |
| print(info) | |
| if __name__ == "__main__": | |
| tyro.cli(main) | |