save just the eval main results
Browse files- evaluation_logic.py +20 -1
evaluation_logic.py
CHANGED
@@ -57,15 +57,34 @@ def save_prediction(inference_api, model_name, prompt_format, question, generate
|
|
57 |
def save_evaluation(inference_api, model_name, prompt_format, metrics):
|
58 |
evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
|
59 |
evaluation_folder.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
with evaluation_scheduler.lock:
|
61 |
with evaluation_file.open("a") as f:
|
62 |
json.dump({
|
63 |
"inference_api": inference_api,
|
64 |
"model_name": model_name,
|
65 |
"prompt_format": prompt_format,
|
66 |
-
"
|
67 |
"timestamp": datetime.now().isoformat()
|
68 |
}, f)
|
|
|
69 |
|
70 |
def run_prediction(inference_api, model_name, prompt_format, output_file):
|
71 |
dataset_path = str(eval_dir / "data/dev.json")
|
|
|
57 |
def save_evaluation(inference_api, model_name, prompt_format, metrics):
|
58 |
evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
|
59 |
evaluation_folder.mkdir(parents=True, exist_ok=True)
|
60 |
+
|
61 |
+
# Extract only the category-specific execution metrics
|
62 |
+
categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
|
63 |
+
simplified_metrics = {}
|
64 |
+
|
65 |
+
for category in categories:
|
66 |
+
if category in metrics['exec']:
|
67 |
+
category_metrics = metrics['exec'][category]
|
68 |
+
simplified_metrics[category] = {
|
69 |
+
'count': category_metrics['count'],
|
70 |
+
'execution_accuracy': category_metrics['exec']
|
71 |
+
}
|
72 |
+
else:
|
73 |
+
simplified_metrics[category] = {
|
74 |
+
'count': 0,
|
75 |
+
'execution_accuracy': 0.0
|
76 |
+
}
|
77 |
+
|
78 |
with evaluation_scheduler.lock:
|
79 |
with evaluation_file.open("a") as f:
|
80 |
json.dump({
|
81 |
"inference_api": inference_api,
|
82 |
"model_name": model_name,
|
83 |
"prompt_format": prompt_format,
|
84 |
+
"category_metrics": simplified_metrics,
|
85 |
"timestamp": datetime.now().isoformat()
|
86 |
}, f)
|
87 |
+
f.write('\n')
|
88 |
|
89 |
def run_prediction(inference_api, model_name, prompt_format, output_file):
|
90 |
dataset_path = str(eval_dir / "data/dev.json")
|