Spaces:

sparse-generative-ai
/

open-moe-llm-leaderboard

Runtime error

App Files Files Community

AppleSwing commited on May 2, 2024

Commit

9ffef81

1 Parent(s): f38163c

Fix some bugs

Browse files

Files changed (5) hide show

backend-cli.py +26 -24
src/backend/envs.py +1 -1
src/display/utils.py +1 -0
src/submission/check_validity.py +2 -1
src/utils.py +102 -1

backend-cli.py CHANGED Viewed

@@ -17,7 +17,7 @@ from src.backend.manage_requests import EvalRequest
 from src.leaderboard.read_evals import EvalResult
 from src.envs import QUEUE_REPO, RESULTS_REPO, API, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
-from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
 from src.leaderboard.read_evals import get_raw_eval_results
@@ -142,9 +142,6 @@ def request_to_result_name(request: EvalRequest) -> str:
 def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
     batch_size = 1
     batch_size = eval_request.batch_size
-    if args.debug:
-        RESULTS_REPO = DEBUG_RESULTS_REPO
     init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
     # if init_gpu_info['Mem(M)'] > 500:
@@ -388,21 +385,7 @@ def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) -
     return False
-def get_gpu_details():
-    gpus = GPUtil.getGPUs()
-    gpu = gpus[0]
-    name = gpu.name.replace(" ", "-")
-    # Convert memory from MB to GB and round to nearest whole number
-    memory_gb = round(gpu.memoryTotal / 1024)
-    memory = f"{memory_gb}GB"
-    formatted_name = f"{name}-{memory}"
-    return formatted_name
 def process_pending_requests() -> bool:
-    if args.debug:
-        QUEUE_REPO = DEBUG_QUEUE_REPO
     sanity_checks()
     print("Processing pending requests")
     current_pending_status = [PENDING_STATUS]
@@ -472,6 +455,7 @@ def get_args():
     parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
     parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
                         help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
     return parser.parse_args()
@@ -479,7 +463,7 @@ if __name__ == "__main__":
     args = get_args()
     local_debug = args.debug
     # debug specific task by ping
-    if local_debug:
         # debug_model_names = [args.model]  # Use model from arguments
         # debug_task_name = [args.task]  # Use task from arguments
         debug_model_names = args.model.split(",")
@@ -510,25 +494,43 @@ if __name__ == "__main__":
                     results = process_evaluation(task, eval_request, limit=args.limit)
                     # except Exception as e:
                     #     print(f"debug running error: {e}")
-    else:
         while True:
             res = False
             # if random.randint(0, 10) == 0:
             res = process_pending_requests()
             print(f"waiting for 60 seconds")
             time.sleep(60)
             # if res is False:
             #     if random.randint(0, 5) == 0:
             #         res = maybe_refresh_results(100)
             #     else:
             #         res = process_finished_requests(100)
             # time.sleep(60)
             # if res is False:
             #     if random.randint(0, 5) == 0:
             #         res = maybe_refresh_results(0)
             #     else:
             #         res = process_finished_requests(0)

 from src.leaderboard.read_evals import EvalResult
 from src.envs import QUEUE_REPO, RESULTS_REPO, API, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
+from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus, get_gpu_details
 from src.leaderboard.read_evals import get_raw_eval_results
 def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
     batch_size = 1
     batch_size = eval_request.batch_size
     init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
     # if init_gpu_info['Mem(M)'] > 500:
     return False
 def process_pending_requests() -> bool:
     sanity_checks()
     print("Processing pending requests")
     current_pending_status = [PENDING_STATUS]
     parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
     parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
                         help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
+    parser.add_argument("--debug_repo", action="store_true", help="Use debug repo")
     return parser.parse_args()
     args = get_args()
     local_debug = args.debug
     # debug specific task by ping
+    if local_debug and not args.debug_repo:
         # debug_model_names = [args.model]  # Use model from arguments
         # debug_task_name = [args.task]  # Use task from arguments
         debug_model_names = args.model.split(",")
                     results = process_evaluation(task, eval_request, limit=args.limit)
                     # except Exception as e:
                     #     print(f"debug running error: {e}")
+    elif local_debug and args.debug_repo:
+        QUEUE_REPO = DEBUG_QUEUE_REPO
+        RESULTS_REPO = DEBUG_RESULTS_REPO
         while True:
             res = False
             # if random.randint(0, 10) == 0:
             res = process_pending_requests()
             print(f"waiting for 60 seconds")
             time.sleep(60)
             # if res is False:
             #     if random.randint(0, 5) == 0:
             #         res = maybe_refresh_results(100)
             #     else:
             #         res = process_finished_requests(100)
             # time.sleep(60)
             # if res is False:
             #     if random.randint(0, 5) == 0:
             #         res = maybe_refresh_results(0)
             #     else:
             #         res = process_finished_requests(0)
+    elif not local_debug and not args.debug_repo:
+        while True:
+           res = False
+           # if random.randint(0, 10) == 0:
+           res = process_pending_requests()
+           print(f"waiting for 60 seconds")
+           time.sleep(60)
+           # if res is False:
+           #     if random.randint(0, 5) == 0:
+           #         res = maybe_refresh_results(100)
+           #     else:
+           #         res = process_finished_requests(100)
+           # time.sleep(60)
+           # if res is False:
+           #     if random.randint(0, 5) == 0:
+           #         res = maybe_refresh_results(0)
+           #     else:
+           #         res = process_finished_requests(0)
+    else:
+        raise Exception("Cannot use debug_repo without local debug flag")

src/backend/envs.py CHANGED Viewed

@@ -57,7 +57,7 @@ class Tasks(Enum):
     # task20 = Task("race", "acc", "RACE", 0)
     task21 = Task("mmlu", "acc", "MMLU", 5)
-    task22 = Task("gsm8k", "exact_match", "GSM8K", 5)
 EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")

     # task20 = Task("race", "acc", "RACE", 0)
     task21 = Task("mmlu", "acc", "MMLU", 5)
+    task22 = Task("gsm8k", "em", "GSM8K", 5)
 EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")

src/display/utils.py CHANGED Viewed

@@ -75,6 +75,7 @@ class Tasks(Enum):
     # # XXX include me back at some point
     selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
     mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
 # These classes are for user facing column names,

     # # XXX include me back at some point
     selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
     mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
+    gsm8k = Task("gsm8k", "em", "GSM8K") #GSM8K/EM (5-shot)
 # These classes are for user facing column names,

src/submission/check_validity.py CHANGED Viewed

@@ -130,7 +130,8 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
                     continue
                 with open(os.path.join(root, file), "r") as f:
                     info = json.load(f)
-                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}_{info['gpu_type']}")
                     # Select organisation
                     if info["model"].count("/") == 0 or "submitted_time" not in info:

                     continue
                 with open(os.path.join(root, file), "r") as f:
                     info = json.load(f)
+                    if not info["status"] == "FINISHED" and not info["status"] == "RUNNING":
+                        file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}_{info['gpu_type']}")
                     # Select organisation
                     if info["model"].count("/") == 0 or "submitted_time" not in info:

src/utils.py CHANGED Viewed

@@ -3,12 +3,48 @@ from huggingface_hub import snapshot_download
 import subprocess
 import re
 import os
 try:
     from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
 except:
     print("local debug: from display.utils")
     from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
 def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
     for i in range(10):
@@ -56,7 +92,7 @@ def parse_nvidia_smi():
     gpu_stats = []
     gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
-    gpu_name_pattern = re.compile(r'NVIDIA\s+([\w\s]+?\d+GB)')
     gpu_name = ""
     for index in gpu_indices:
@@ -131,5 +167,70 @@ def analyze_gpu_stats(stats_list):
     return avg_stats
 if __name__ == "__main__":
     print(analyze_gpu_stats(parse_nvidia_smi()))

 import subprocess
 import re
 import os
+import GPUtil
 try:
     from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
 except:
     print("local debug: from display.utils")
     from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
+MEM_BW_DICT ={
+    "NVIDIA-A100-PCIe-80GB": 1935,
+    "NVIDIA-A100-SXM-80GB": 2039,
+    "NVIDIA-H100-PCIe-80GB": 2039,
+    "NVIDIA-RTX-A5000-24GB": 768
+}
+PEAK_FLOPS_DICT = {
+    "float32":{
+        "NVIDIA-A100-PCIe-80GB": 312e12,
+        "NVIDIA-A100-SXM-80GB": 312e12,
+        "NVIDIA-H100-PCIe-80GB": 756e12,
+        "NVIDIA-RTX-A5000-24GB": 222.2e12
+    },
+    "float16":{
+        "NVIDIA-A100-PCIe-80GB": 624e12,
+        "NVIDIA-A100-SXM-80GB": 624e12,
+        "NVIDIA-H100-PCIe-80GB": 1513e12,
+        "NVIDIA-RTX-A5000-24GB": 444.4e12
+    },
+    "8bit":{
+        "NVIDIA-A100-PCIe-80GB": 1248e12,
+        "NVIDIA-A100-SXM-80GB": 1248e12,
+        "NVIDIA-H100-PCIe-80GB": 3026e12,
+        "NVIDIA-RTX-A5000-24GB": 889e12
+    },
+    "4bit": {
+        "NVIDIA-A100-PCIe-80GB": 2496e12,
+        "NVIDIA-A100-SXM-80GB": 2496e12,
+        "NVIDIA-H100-PCIe-80GB": 6052e12,
+        "NVIDIA-RTX-A5000-24GB": 1778e12
+    }
+}
 def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
     for i in range(10):
     gpu_stats = []
     gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
+    gpu_name_pattern = re.compile(r'NVIDIA\s+([\w\s]+\d+(?:\s*GB)?)')
     gpu_name = ""
     for index in gpu_indices:
     return avg_stats
+def get_gpu_number():
+    visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', None)
+    if visible_devices is not None:
+        gpu_indices = visible_devices.split(',')
+    else:
+        # Query all GPU indices if CUDA_VISIBLE_DEVICES is not set
+        result = subprocess.run(['nvidia-smi', '--query-gpu=index', '--format=csv,noheader'], capture_output=True, text=True)
+        if result.returncode != 0:
+            print("Failed to query GPU indices.")
+            return []
+        gpu_indices = result.stdout.strip().split('\n')
+    # print(f"gpu_indices: {gpu_indices}")
+    gpu_stats = []
+    gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
+    for index in gpu_indices:
+        result = subprocess.run(['nvidia-smi', '-i', index], capture_output=True, text=True)
+        output = result.stdout.strip()
+        lines = output.split("\n")
+        for line in lines:
+            match = gpu_info_pattern.search(line)
+            gpu_info = {}
+            if match:
+                temp, power_usage, mem_usage, gpu_util = map(int, match.groups())
+                gpu_info.update({
+                    GPU_TEMP: temp,
+                    GPU_Power: power_usage,
+                    GPU_Mem: round(mem_usage / 1024, 2),
+                    GPU_Util: gpu_util
+                })
+            if len(gpu_info) >= 4:
+                gpu_stats.append(gpu_info)
+    return len(gpu_stats)
+def get_gpu_details():
+    gpus = GPUtil.getGPUs()
+    gpu = gpus[0]
+    name = gpu.name.replace(" ", "-")
+    # Convert memory from MB to GB and round to nearest whole number
+    memory_gb = round(gpu.memoryTotal / 1024)
+    memory = f"{memory_gb}GB"
+    formatted_name = f"{name}-{memory}"
+    return formatted_name
+def get_peak_bw(gpu_name):
+    return MEM_BW_DICT[gpu_name]
+def get_peak_flops(gpu_name, precision):
+    return PEAK_FLOPS_DICT[precision][gpu_name]
+def transfer_precision2bytes(precision):
+    if precision == "float32":
+        return 4
+    elif precision == "float16":
+        return 2
+    elif precision == "8bit":
+        return 1
+    elif precision == "4bit":
+        return 0.5
+    else:
+        raise ValueError(f"Unsupported precision: {precision}")
 if __name__ == "__main__":
     print(analyze_gpu_stats(parse_nvidia_smi()))