Spaces:
Running
on
Zero
Running
on
Zero
| import json | |
| import os | |
| from collections import defaultdict | |
| import numpy as np | |
| import argparse | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.') | |
| parser.add_argument('-d', '--dir', default=None) | |
| parser.add_argument('-v', '--version', default=None) | |
| parser.add_argument('-s', '--select', nargs='*', default=None) | |
| parser.add_argument('-f', '--files', nargs='*', default=[]) | |
| parser.add_argument('-i', '--ignore', nargs='*', default=[]) | |
| return parser.parse_args() | |
| if __name__ == '__main__': | |
| args = parse_args() | |
| if args.ignore is not None: | |
| args.ignore = [int(x) for x in args.ignore] | |
| if len(args.files) > 0: | |
| review_files = args.files | |
| else: | |
| review_files = [x for x in os.listdir(args.dir) if x.endswith('.jsonl') and (x.startswith('gpt4_text') or x.startswith('reviews_') or x.startswith('review_') or 'review' in args.dir)] | |
| for review_file in sorted(review_files): | |
| config = os.path.basename(review_file).replace('gpt4_text_', '').replace('.jsonl', '') | |
| if args.select is not None and any(x not in config for x in args.select): | |
| continue | |
| if '0613' in config: | |
| version = '0613' | |
| else: | |
| version = '0314' | |
| if args.version is not None and args.version != version: | |
| continue | |
| scores = defaultdict(list) | |
| print(config) | |
| with open(os.path.join(args.dir, review_file) if args.dir is not None else review_file) as f: | |
| for review_str in f: | |
| review = json.loads(review_str) | |
| if review['question_id'] in args.ignore: | |
| continue | |
| if 'category' in review: | |
| scores[review['category']].append(review['tuple']) | |
| scores['all'].append(review['tuple']) | |
| else: | |
| if 'tuple' in review: | |
| scores['all'].append(review['tuple']) | |
| else: | |
| scores['all'].append(review['score']) | |
| for k, v in sorted(scores.items()): | |
| stats = np.asarray(v).mean(0).tolist() | |
| stats = [round(x, 3) for x in stats] | |
| # print(k, stats, round(stats[1]/stats[0]*100, 1)) | |
| print(k, round(stats[1]/stats[0]*100, 1), round(stats[0] * 10, 1), round(stats[1] * 10, 1)) | |
| print('=================================') | |