Ahmed Ahmed commited on
Commit
36b1a23
·
1 Parent(s): 4864926
src/evaluation/initialize_models.py CHANGED
@@ -73,6 +73,50 @@ def create_model_result_file(model_name, precision="float16"):
73
  sys.stderr.flush()
74
  return None
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def initialize_allowed_models():
77
  """
78
  Initialize result files for all allowed models.
@@ -81,6 +125,9 @@ def initialize_allowed_models():
81
  sys.stderr.write(f"📋 Models to initialize: {ALLOWED_MODELS}\n")
82
  sys.stderr.flush()
83
 
 
 
 
84
  created_files = []
85
 
86
  for model_name in ALLOWED_MODELS:
 
73
  sys.stderr.flush()
74
  return None
75
 
76
+ def clean_non_allowed_results():
77
+ """
78
+ Remove result files for models that are not in the allowed list.
79
+ """
80
+ sys.stderr.write(f"\n🧹 CLEANING NON-ALLOWED RESULT FILES\n")
81
+ sys.stderr.flush()
82
+
83
+ if not os.path.exists(EVAL_RESULTS_PATH):
84
+ sys.stderr.write("📁 Results directory doesn't exist, nothing to clean\n")
85
+ sys.stderr.flush()
86
+ return
87
+
88
+ removed_count = 0
89
+
90
+ # Walk through all files in the results directory
91
+ for root, dirs, files in os.walk(EVAL_RESULTS_PATH):
92
+ for file in files:
93
+ if not file.endswith('.json'):
94
+ continue
95
+
96
+ file_path = os.path.join(root, file)
97
+
98
+ try:
99
+ # Try to extract model name from the result file
100
+ with open(file_path, 'r') as f:
101
+ data = json.load(f)
102
+
103
+ config = data.get("config", {})
104
+ model_name = config.get("model_name", "")
105
+
106
+ if model_name and not is_model_allowed(model_name):
107
+ sys.stderr.write(f"🗑️ Removing non-allowed model result: {file_path} (model: {model_name})\n")
108
+ os.remove(file_path)
109
+ removed_count += 1
110
+ elif not model_name:
111
+ sys.stderr.write(f"⚠️ Skipping file with no model_name: {file_path}\n")
112
+
113
+ except Exception as e:
114
+ sys.stderr.write(f"⚠️ Error processing file {file_path}: {e}\n")
115
+ continue
116
+
117
+ sys.stderr.write(f"✅ Removed {removed_count} non-allowed result files\n")
118
+ sys.stderr.flush()
119
+
120
  def initialize_allowed_models():
121
  """
122
  Initialize result files for all allowed models.
 
125
  sys.stderr.write(f"📋 Models to initialize: {ALLOWED_MODELS}\n")
126
  sys.stderr.flush()
127
 
128
+ # First, clean up any existing non-allowed results
129
+ clean_non_allowed_results()
130
+
131
  created_files = []
132
 
133
  for model_name in ALLOWED_MODELS:
src/leaderboard/read_evals.py CHANGED
@@ -207,6 +207,24 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
207
  try:
208
  sys.stderr.write(f"\nProcessing file: {model_result_filepath}\n")
209
  sys.stderr.flush()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  # Creation of result
211
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
212
  sys.stderr.write(f"Created result object for: {eval_result.full_model}\n")
 
207
  try:
208
  sys.stderr.write(f"\nProcessing file: {model_result_filepath}\n")
209
  sys.stderr.flush()
210
+
211
+ # Quick pre-check: Try to extract model name from file before full processing
212
+ try:
213
+ with open(model_result_filepath, 'r') as f:
214
+ data = json.load(f)
215
+ config = data.get("config", {})
216
+ model_name = config.get("model_name", "")
217
+
218
+ if model_name and not is_model_allowed(model_name):
219
+ sys.stderr.write(f"⏭️ Skipping non-allowed model file: {model_result_filepath} (model: {model_name})\n")
220
+ sys.stderr.flush()
221
+ continue
222
+
223
+ except Exception as e:
224
+ sys.stderr.write(f"⚠️ Error pre-checking file {model_result_filepath}: {e}\n")
225
+ sys.stderr.flush()
226
+ continue
227
+
228
  # Creation of result
229
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
230
  sys.stderr.write(f"Created result object for: {eval_result.full_model}\n")