future-xy
commited on
Commit
·
d936aea
1
Parent(s):
88d1c0e
improve local debug
Browse files- backend-cli.py +19 -11
- src/backend/envs.py +0 -2
backend-cli.py
CHANGED
|
@@ -11,7 +11,7 @@ from datetime import datetime
|
|
| 11 |
from src.backend.run_eval_suite import run_evaluation
|
| 12 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
| 13 |
from src.backend.sort_queue import sort_models_by_priority
|
| 14 |
-
from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE,
|
| 15 |
from src.backend.manage_requests import EvalRequest
|
| 16 |
from src.leaderboard.read_evals import EvalResult
|
| 17 |
|
|
@@ -122,7 +122,7 @@ def request_to_result_name(request: EvalRequest) -> str:
|
|
| 122 |
return res
|
| 123 |
|
| 124 |
|
| 125 |
-
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
| 126 |
batch_size = 1
|
| 127 |
try:
|
| 128 |
results = run_evaluation(
|
|
@@ -132,7 +132,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
|
| 132 |
batch_size=batch_size,
|
| 133 |
device=DEVICE,
|
| 134 |
use_cache=None,
|
| 135 |
-
limit=
|
| 136 |
)
|
| 137 |
except RuntimeError as e:
|
| 138 |
if "No executable batch size found" in str(e):
|
|
@@ -144,7 +144,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
|
| 144 |
batch_size=batch_size,
|
| 145 |
device=DEVICE,
|
| 146 |
use_cache=None,
|
| 147 |
-
limit=
|
| 148 |
)
|
| 149 |
else:
|
| 150 |
raise
|
|
@@ -395,6 +395,12 @@ def process_pending_requests() -> bool:
|
|
| 395 |
def get_args():
|
| 396 |
parser = argparse.ArgumentParser(description="Run the backend")
|
| 397 |
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
return parser.parse_args()
|
| 399 |
|
| 400 |
|
|
@@ -403,11 +409,8 @@ if __name__ == "__main__":
|
|
| 403 |
local_debug = args.debug
|
| 404 |
# debug specific task by ping
|
| 405 |
if local_debug:
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
# debug_model_names = ["TheBloke/Mixtral-8x7B-v0.1-GPTQ"]
|
| 409 |
-
debug_task_name = 'selfcheckgpt'
|
| 410 |
-
# debug_task_name = "mmlu"
|
| 411 |
task_lst = TASKS_HARNESS.copy()
|
| 412 |
for task in task_lst:
|
| 413 |
for debug_model_name in debug_model_names:
|
|
@@ -415,9 +418,14 @@ if __name__ == "__main__":
|
|
| 415 |
if task_name != debug_task_name:
|
| 416 |
continue
|
| 417 |
eval_request = EvalRequest(
|
| 418 |
-
model=debug_model_name,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
)
|
| 420 |
-
results = process_evaluation(task, eval_request)
|
| 421 |
else:
|
| 422 |
while True:
|
| 423 |
res = False
|
|
|
|
| 11 |
from src.backend.run_eval_suite import run_evaluation
|
| 12 |
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
| 13 |
from src.backend.sort_queue import sort_models_by_priority
|
| 14 |
+
from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, Task
|
| 15 |
from src.backend.manage_requests import EvalRequest
|
| 16 |
from src.leaderboard.read_evals import EvalResult
|
| 17 |
|
|
|
|
| 122 |
return res
|
| 123 |
|
| 124 |
|
| 125 |
+
def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
|
| 126 |
batch_size = 1
|
| 127 |
try:
|
| 128 |
results = run_evaluation(
|
|
|
|
| 132 |
batch_size=batch_size,
|
| 133 |
device=DEVICE,
|
| 134 |
use_cache=None,
|
| 135 |
+
limit=limit,
|
| 136 |
)
|
| 137 |
except RuntimeError as e:
|
| 138 |
if "No executable batch size found" in str(e):
|
|
|
|
| 144 |
batch_size=batch_size,
|
| 145 |
device=DEVICE,
|
| 146 |
use_cache=None,
|
| 147 |
+
limit=limit,
|
| 148 |
)
|
| 149 |
else:
|
| 150 |
raise
|
|
|
|
| 395 |
def get_args():
|
| 396 |
parser = argparse.ArgumentParser(description="Run the backend")
|
| 397 |
parser.add_argument("--debug", action="store_true", help="Run in debug mode")
|
| 398 |
+
# debug parameters
|
| 399 |
+
parser.add_argument("--task", type=str, default="selfcheckgpt", help="Task to debug")
|
| 400 |
+
parser.add_argument("--model", type=str, default="facebook/opt-1.3b", help="Model to debug")
|
| 401 |
+
parser.add_argument("--precision", type=str, default="float16", help="Precision to debug")
|
| 402 |
+
parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
|
| 403 |
+
parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
|
| 404 |
return parser.parse_args()
|
| 405 |
|
| 406 |
|
|
|
|
| 409 |
local_debug = args.debug
|
| 410 |
# debug specific task by ping
|
| 411 |
if local_debug:
|
| 412 |
+
debug_model_names = [args.model] # Use model from arguments
|
| 413 |
+
debug_task_name = args.task # Use task from arguments
|
|
|
|
|
|
|
|
|
|
| 414 |
task_lst = TASKS_HARNESS.copy()
|
| 415 |
for task in task_lst:
|
| 416 |
for debug_model_name in debug_model_names:
|
|
|
|
| 418 |
if task_name != debug_task_name:
|
| 419 |
continue
|
| 420 |
eval_request = EvalRequest(
|
| 421 |
+
model=debug_model_name,
|
| 422 |
+
private=False,
|
| 423 |
+
status="",
|
| 424 |
+
json_filepath="",
|
| 425 |
+
precision=args.precision, # Use precision from arguments
|
| 426 |
+
inference_framework=args.inference_framework # Use inference framework from arguments
|
| 427 |
)
|
| 428 |
+
results = process_evaluation(task, eval_request, limit=args.limit)
|
| 429 |
else:
|
| 430 |
while True:
|
| 431 |
res = False
|
src/backend/envs.py
CHANGED
|
@@ -64,5 +64,3 @@ EVAL_REQUESTS_PATH_BACKEND_SYNC = os.path.join(CACHE_PATH, "eval-queue-bk-sync")
|
|
| 64 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 65 |
|
| 66 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 67 |
-
|
| 68 |
-
LIMIT = None # Testing; needs to be None
|
|
|
|
| 64 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 65 |
|
| 66 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|