Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		add choice for device, and verify in backend. Add debug mode (#18)
Browse files- Add app debug mode and dynamic refresh tables (2a18e0ad941b517867200352ba49273da53f5907)
- Merge branch 'main' into pr/15 (a4829c27aeca63dc5327ec3a4287eb66bb2cbde8)
- Add inference_framework to the queue column (86b14ca786017b5479b54e4402226f7597295729)
- Add requirements (f5ff85d527bfa186b8e105d5637ac4e3793a9721)
- Merge branch 'main' into pr/15 (08b56fc73f03f150ef1baa35f78e762dcbf83fd5)
- Merge branch 'pr/15' into pr/18 (b2a2a5bae92f4b80223988e2059a69dfac7caaa8)
- Add GPU types (60d9c33965a34f63d2026b722afa33c03fe48306)
- Delete requests (22ce8a7836b70c1849ec4aeb77be3fce2642bcab)
- add choices for GPU and Solve leaderboard issue (bc48941fdfee36d8d1510a96b2969daa5d1ebf3a)
- fix a bug (6e99f9d4535fd801ae6b675ef2d833cc109e9d74)
- Apply GPU type verification on backend debug mode (dbe8db4df45ec9d75a8ce5abd46b77ff2e7627b7)
- Fix a bug (0fb715c8b89cef41ec9497c09b6ad8db47f65d78)
- app.py +83 -16
- backend-cli.py +33 -4
- requirements.txt +1 -0
- src/backend/manage_requests.py +1 -0
- src/display/utils.py +19 -0
- src/envs.py +2 -2
- src/populate.py +2 -0
- src/submission/check_validity.py +1 -1
- src/submission/submit.py +9 -3
| @@ -2,10 +2,11 @@ | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import datetime
         | 
| 4 | 
             
            import socket
         | 
|  | |
| 5 |  | 
| 6 | 
             
            import gradio as gr
         | 
| 7 | 
             
            import pandas as pd
         | 
| 8 | 
            -
             | 
| 9 | 
             
            from apscheduler.schedulers.background import BackgroundScheduler
         | 
| 10 |  | 
| 11 | 
             
            from huggingface_hub import snapshot_download
         | 
| @@ -35,13 +36,27 @@ from src.display.utils import ( | |
| 35 | 
             
                fields,
         | 
| 36 | 
             
                WeightType,
         | 
| 37 | 
             
                Precision,
         | 
|  | |
| 38 | 
             
            )
         | 
| 39 |  | 
| 40 | 
            -
            from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC,  | 
|  | |
| 41 | 
             
            from src.populate import get_evaluation_queue_df, get_leaderboard_df
         | 
| 42 | 
             
            from src.submission.submit import add_new_eval
         | 
| 43 | 
             
            from src.utils import get_dataset_summary_table
         | 
| 44 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 45 |  | 
| 46 | 
             
            def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
         | 
| 47 | 
             
                try:
         | 
| @@ -75,11 +90,6 @@ def init_space(): | |
| 75 | 
             
                )
         | 
| 76 | 
             
                return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
         | 
| 77 |  | 
| 78 | 
            -
             | 
| 79 | 
            -
            dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
         | 
| 80 | 
            -
            leaderboard_df = original_df.copy()
         | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
             
            # Searching and filtering
         | 
| 84 | 
             
            def update_table(
         | 
| 85 | 
             
                hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
         | 
| @@ -142,6 +152,51 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio | |
| 142 |  | 
| 143 | 
             
                return filtered_df
         | 
| 144 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 145 |  | 
| 146 | 
             
            # triggered only once at startup => read query parameter if it exists
         | 
| 147 | 
             
            def load_query(request: gr.Request):
         | 
| @@ -162,7 +217,7 @@ with demo: | |
| 162 | 
             
                                    search_bar = gr.Textbox(
         | 
| 163 | 
             
                                        placeholder=" 🔍 Model search (separate multiple queries with `;`)",
         | 
| 164 | 
             
                                        show_label=False,
         | 
| 165 | 
            -
                                        elem_id="search-bar" | 
| 166 | 
             
                                    )
         | 
| 167 | 
             
                                with gr.Row():
         | 
| 168 | 
             
                                    shown_columns = gr.CheckboxGroup(
         | 
| @@ -251,14 +306,14 @@ with demo: | |
| 251 | 
             
                                filter_columns_size,
         | 
| 252 | 
             
                                search_bar,
         | 
| 253 | 
             
                            ],
         | 
| 254 | 
            -
                            leaderboard_table | 
| 255 | 
             
                        )
         | 
| 256 |  | 
| 257 | 
             
                        # Check query parameter once at startup and update search bar
         | 
| 258 | 
             
                        demo.load(load_query, inputs=[], outputs=[search_bar])
         | 
| 259 |  | 
| 260 | 
             
                        for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
         | 
| 261 | 
            -
                            selector. | 
| 262 | 
             
                                update_table,
         | 
| 263 | 
             
                                [
         | 
| 264 | 
             
                                    hidden_leaderboard_table_for_search,
         | 
| @@ -323,6 +378,15 @@ with demo: | |
| 323 | 
             
                                value=None,
         | 
| 324 | 
             
                                interactive=True,
         | 
| 325 | 
             
                            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 326 |  | 
| 327 | 
             
                        with gr.Row():
         | 
| 328 | 
             
                            with gr.Column():
         | 
| @@ -358,6 +422,7 @@ with demo: | |
| 358 |  | 
| 359 | 
             
                        submit_button = gr.Button("Submit Eval")
         | 
| 360 | 
             
                        submission_result = gr.Markdown()
         | 
|  | |
| 361 | 
             
                        submit_button.click(
         | 
| 362 | 
             
                            add_new_eval,
         | 
| 363 | 
             
                            [
         | 
| @@ -369,6 +434,8 @@ with demo: | |
| 369 | 
             
                                weight_type,
         | 
| 370 | 
             
                                model_type,
         | 
| 371 | 
             
                                inference_framework,
         | 
|  | |
|  | |
| 372 | 
             
                            ],
         | 
| 373 | 
             
                            submission_result,
         | 
| 374 | 
             
                        )
         | 
| @@ -385,8 +452,7 @@ with demo: | |
| 385 |  | 
| 386 | 
             
            scheduler = BackgroundScheduler()
         | 
| 387 |  | 
| 388 | 
            -
            scheduler.add_job(restart_space, "interval",  | 
| 389 | 
            -
             | 
| 390 |  | 
| 391 | 
             
            def launch_backend():
         | 
| 392 | 
             
                import subprocess
         | 
| @@ -395,8 +461,9 @@ def launch_backend(): | |
| 395 | 
             
                if DEVICE not in {"cpu"}:
         | 
| 396 | 
             
                    _ = subprocess.run(["python", "backend-cli.py"])
         | 
| 397 |  | 
| 398 | 
            -
             | 
| 399 | 
             
            # scheduler.add_job(launch_backend, "interval", seconds=120)
         | 
| 400 | 
            -
             | 
| 401 | 
            -
            scheduler.start()
         | 
| 402 | 
            -
            demo.queue(default_concurrency_limit=40).launch()
         | 
|  | 
|  | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import datetime
         | 
| 4 | 
             
            import socket
         | 
| 5 | 
            +
            from threading import Thread
         | 
| 6 |  | 
| 7 | 
             
            import gradio as gr
         | 
| 8 | 
             
            import pandas as pd
         | 
| 9 | 
            +
            import time
         | 
| 10 | 
             
            from apscheduler.schedulers.background import BackgroundScheduler
         | 
| 11 |  | 
| 12 | 
             
            from huggingface_hub import snapshot_download
         | 
|  | |
| 36 | 
             
                fields,
         | 
| 37 | 
             
                WeightType,
         | 
| 38 | 
             
                Precision,
         | 
| 39 | 
            +
                GPUType
         | 
| 40 | 
             
            )
         | 
| 41 |  | 
| 42 | 
            +
            from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
         | 
| 43 | 
            +
                QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
         | 
| 44 | 
             
            from src.populate import get_evaluation_queue_df, get_leaderboard_df
         | 
| 45 | 
             
            from src.submission.submit import add_new_eval
         | 
| 46 | 
             
            from src.utils import get_dataset_summary_table
         | 
| 47 |  | 
| 48 | 
            +
            def get_args():
         | 
| 49 | 
            +
                import argparse
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
         | 
| 52 | 
            +
                parser.add_argument("--debug", action="store_true", help="Run in debug mode")
         | 
| 53 | 
            +
                return parser.parse_args()
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            args = get_args()
         | 
| 56 | 
            +
            if args.debug:
         | 
| 57 | 
            +
                print("Running in debug mode")
         | 
| 58 | 
            +
                QUEUE_REPO = DEBUG_QUEUE_REPO
         | 
| 59 | 
            +
                RESULTS_REPO = DEBUG_RESULTS_REPO
         | 
| 60 |  | 
| 61 | 
             
            def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
         | 
| 62 | 
             
                try:
         | 
|  | |
| 90 | 
             
                )
         | 
| 91 | 
             
                return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
         | 
| 92 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 | 
             
            # Searching and filtering
         | 
| 94 | 
             
            def update_table(
         | 
| 95 | 
             
                hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
         | 
|  | |
| 152 |  | 
| 153 | 
             
                return filtered_df
         | 
| 154 |  | 
| 155 | 
            +
            shown_columns = None
         | 
| 156 | 
            +
            dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
         | 
| 157 | 
            +
            leaderboard_df = original_df.copy()
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            # def update_leaderboard_table():
         | 
| 160 | 
            +
            #     global leaderboard_df, shown_columns
         | 
| 161 | 
            +
            #     print("Updating leaderboard table")
         | 
| 162 | 
            +
            #     return leaderboard_df[
         | 
| 163 | 
            +
            #                 [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
         | 
| 164 | 
            +
            #                 + shown_columns.value
         | 
| 165 | 
            +
            #                 + [AutoEvalColumn.dummy.name]
         | 
| 166 | 
            +
            #             ] if not leaderboard_df.empty else leaderboard_df
         | 
| 167 | 
            +
                    
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            # def update_hidden_leaderboard_table():
         | 
| 170 | 
            +
            #     global original_df
         | 
| 171 | 
            +
            #     return original_df[COLS] if original_df.empty is False else original_df
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            # def update_dataset_table():
         | 
| 174 | 
            +
            #     global dataset_df
         | 
| 175 | 
            +
            #     return dataset_df
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            # def update_finish_table():
         | 
| 178 | 
            +
            #     global finished_eval_queue_df
         | 
| 179 | 
            +
            #     return finished_eval_queue_df
         | 
| 180 | 
            +
             | 
| 181 | 
            +
            # def update_running_table():
         | 
| 182 | 
            +
            #     global running_eval_queue_df
         | 
| 183 | 
            +
            #     return running_eval_queue_df
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            # def update_pending_table():
         | 
| 186 | 
            +
            #     global pending_eval_queue_df
         | 
| 187 | 
            +
            #     return pending_eval_queue_df
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            # def update_finish_num():
         | 
| 190 | 
            +
            #     global finished_eval_queue_df
         | 
| 191 | 
            +
            #     return len(finished_eval_queue_df)
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            # def update_running_num():
         | 
| 194 | 
            +
            #     global running_eval_queue_df
         | 
| 195 | 
            +
            #     return len(running_eval_queue_df)
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            # def update_pending_num():
         | 
| 198 | 
            +
            #     global pending_eval_queue_df
         | 
| 199 | 
            +
            #     return len(pending_eval_queue_df)
         | 
| 200 |  | 
| 201 | 
             
            # triggered only once at startup => read query parameter if it exists
         | 
| 202 | 
             
            def load_query(request: gr.Request):
         | 
|  | |
| 217 | 
             
                                    search_bar = gr.Textbox(
         | 
| 218 | 
             
                                        placeholder=" 🔍 Model search (separate multiple queries with `;`)",
         | 
| 219 | 
             
                                        show_label=False,
         | 
| 220 | 
            +
                                        elem_id="search-bar"
         | 
| 221 | 
             
                                    )
         | 
| 222 | 
             
                                with gr.Row():
         | 
| 223 | 
             
                                    shown_columns = gr.CheckboxGroup(
         | 
|  | |
| 306 | 
             
                                filter_columns_size,
         | 
| 307 | 
             
                                search_bar,
         | 
| 308 | 
             
                            ],
         | 
| 309 | 
            +
                            leaderboard_table
         | 
| 310 | 
             
                        )
         | 
| 311 |  | 
| 312 | 
             
                        # Check query parameter once at startup and update search bar
         | 
| 313 | 
             
                        demo.load(load_query, inputs=[], outputs=[search_bar])
         | 
| 314 |  | 
| 315 | 
             
                        for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
         | 
| 316 | 
            +
                            selector.select(
         | 
| 317 | 
             
                                update_table,
         | 
| 318 | 
             
                                [
         | 
| 319 | 
             
                                    hidden_leaderboard_table_for_search,
         | 
|  | |
| 378 | 
             
                                value=None,
         | 
| 379 | 
             
                                interactive=True,
         | 
| 380 | 
             
                            )
         | 
| 381 | 
            +
                            
         | 
| 382 | 
            +
                            gpu_type = gr.Dropdown(
         | 
| 383 | 
            +
                                choices=[t.to_str() for t in GPUType],
         | 
| 384 | 
            +
                                label="GPU type",
         | 
| 385 | 
            +
                                multiselect=False,
         | 
| 386 | 
            +
                                value="NVIDIA-A100-PCIe-80GB",
         | 
| 387 | 
            +
                                interactive=True,
         | 
| 388 | 
            +
                            )
         | 
| 389 | 
            +
                            
         | 
| 390 |  | 
| 391 | 
             
                        with gr.Row():
         | 
| 392 | 
             
                            with gr.Column():
         | 
|  | |
| 422 |  | 
| 423 | 
             
                        submit_button = gr.Button("Submit Eval")
         | 
| 424 | 
             
                        submission_result = gr.Markdown()
         | 
| 425 | 
            +
                        debug = gr.Checkbox(value=args.debug, label="Debug", visible=False)
         | 
| 426 | 
             
                        submit_button.click(
         | 
| 427 | 
             
                            add_new_eval,
         | 
| 428 | 
             
                            [
         | 
|  | |
| 434 | 
             
                                weight_type,
         | 
| 435 | 
             
                                model_type,
         | 
| 436 | 
             
                                inference_framework,
         | 
| 437 | 
            +
                                debug,
         | 
| 438 | 
            +
                                gpu_type
         | 
| 439 | 
             
                            ],
         | 
| 440 | 
             
                            submission_result,
         | 
| 441 | 
             
                        )
         | 
|  | |
| 452 |  | 
| 453 | 
             
            scheduler = BackgroundScheduler()
         | 
| 454 |  | 
| 455 | 
            +
            scheduler.add_job(restart_space, "interval", hours=6)
         | 
|  | |
| 456 |  | 
| 457 | 
             
            def launch_backend():
         | 
| 458 | 
             
                import subprocess
         | 
|  | |
| 461 | 
             
                if DEVICE not in {"cpu"}:
         | 
| 462 | 
             
                    _ = subprocess.run(["python", "backend-cli.py"])
         | 
| 463 |  | 
| 464 | 
            +
            # Thread(target=periodic_init, daemon=True).start()
         | 
| 465 | 
             
            # scheduler.add_job(launch_backend, "interval", seconds=120)
         | 
| 466 | 
            +
            if __name__ == "__main__":
         | 
| 467 | 
            +
                scheduler.start()
         | 
| 468 | 
            +
                demo.queue(default_concurrency_limit=40).launch()
         | 
| 469 | 
            +
                
         | 
| @@ -16,13 +16,13 @@ from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PAT | |
| 16 | 
             
            from src.backend.manage_requests import EvalRequest
         | 
| 17 | 
             
            from src.leaderboard.read_evals import EvalResult
         | 
| 18 |  | 
| 19 | 
            -
            from src.envs import QUEUE_REPO, RESULTS_REPO, API
         | 
| 20 | 
             
            from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
         | 
| 21 |  | 
| 22 | 
             
            from src.leaderboard.read_evals import get_raw_eval_results
         | 
| 23 |  | 
| 24 | 
             
            from typing import Optional
         | 
| 25 | 
            -
             | 
| 26 | 
             
            import time
         | 
| 27 |  | 
| 28 | 
             
            import pprint
         | 
| @@ -126,6 +126,9 @@ def request_to_result_name(request: EvalRequest) -> str: | |
| 126 | 
             
            def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
         | 
| 127 | 
             
                batch_size = 1
         | 
| 128 | 
             
                batch_size = eval_request.batch_size
         | 
|  | |
|  | |
|  | |
| 129 |  | 
| 130 | 
             
                init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
         | 
| 131 | 
             
                # if init_gpu_info['Mem(M)'] > 500:
         | 
| @@ -364,9 +367,22 @@ def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) - | |
| 364 | 
             
                return False
         | 
| 365 |  | 
| 366 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 367 | 
             
            def process_pending_requests() -> bool:
         | 
|  | |
|  | |
|  | |
| 368 | 
             
                sanity_checks()
         | 
| 369 | 
            -
             | 
| 370 | 
             
                current_pending_status = [PENDING_STATUS]
         | 
| 371 |  | 
| 372 | 
             
                # Get all eval request that are PENDING, if you want to run other evals, change this parameter
         | 
| @@ -385,6 +401,12 @@ def process_pending_requests() -> bool: | |
| 385 |  | 
| 386 | 
             
                eval_request = eval_requests[0]
         | 
| 387 | 
             
                pp.pprint(eval_request)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 388 |  | 
| 389 | 
             
                my_snapshot_download(
         | 
| 390 | 
             
                    repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
         | 
| @@ -426,6 +448,8 @@ def get_args(): | |
| 426 | 
             
                parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
         | 
| 427 | 
             
                parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
         | 
| 428 | 
             
                parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
         | 
|  | |
|  | |
| 429 | 
             
                return parser.parse_args()
         | 
| 430 |  | 
| 431 |  | 
| @@ -454,8 +478,13 @@ if __name__ == "__main__": | |
| 454 | 
             
                                        status="",
         | 
| 455 | 
             
                                        json_filepath="",
         | 
| 456 | 
             
                                        precision=precision,  # Use precision from arguments
         | 
| 457 | 
            -
                                        inference_framework=args.inference_framework  # Use inference framework from arguments
         | 
|  | |
| 458 | 
             
                                    )
         | 
|  | |
|  | |
|  | |
|  | |
| 459 | 
             
                                    results = process_evaluation(task, eval_request, limit=args.limit)
         | 
| 460 | 
             
                                except Exception as e:
         | 
| 461 | 
             
                                    print(f"debug running error: {e}")
         | 
|  | |
| 16 | 
             
            from src.backend.manage_requests import EvalRequest
         | 
| 17 | 
             
            from src.leaderboard.read_evals import EvalResult
         | 
| 18 |  | 
| 19 | 
            +
            from src.envs import QUEUE_REPO, RESULTS_REPO, API, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
         | 
| 20 | 
             
            from src.utils import my_snapshot_download, analyze_gpu_stats, parse_nvidia_smi, monitor_gpus
         | 
| 21 |  | 
| 22 | 
             
            from src.leaderboard.read_evals import get_raw_eval_results
         | 
| 23 |  | 
| 24 | 
             
            from typing import Optional
         | 
| 25 | 
            +
            import GPUtil
         | 
| 26 | 
             
            import time
         | 
| 27 |  | 
| 28 | 
             
            import pprint
         | 
|  | |
| 126 | 
             
            def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[int] = None) -> dict:
         | 
| 127 | 
             
                batch_size = 1
         | 
| 128 | 
             
                batch_size = eval_request.batch_size
         | 
| 129 | 
            +
                
         | 
| 130 | 
            +
                if args.debug:
         | 
| 131 | 
            +
                    RESULTS_REPO = DEBUG_RESULTS_REPO
         | 
| 132 |  | 
| 133 | 
             
                init_gpu_info = analyze_gpu_stats(parse_nvidia_smi())
         | 
| 134 | 
             
                # if init_gpu_info['Mem(M)'] > 500:
         | 
|  | |
| 367 | 
             
                return False
         | 
| 368 |  | 
| 369 |  | 
| 370 | 
            +
            def get_gpu_details():
         | 
| 371 | 
            +
                gpus = GPUtil.getGPUs()
         | 
| 372 | 
            +
                gpu = gpus[0]
         | 
| 373 | 
            +
                name = gpu.name.replace(" ", "-")
         | 
| 374 | 
            +
                # Convert memory from MB to GB and round to nearest whole number
         | 
| 375 | 
            +
                memory_gb = round(gpu.memoryTotal / 1024)
         | 
| 376 | 
            +
                memory = f"{memory_gb}GB"
         | 
| 377 | 
            +
                formatted_name = f"{name}-{memory}"
         | 
| 378 | 
            +
                return formatted_name
         | 
| 379 | 
            +
             | 
| 380 | 
             
            def process_pending_requests() -> bool:
         | 
| 381 | 
            +
                if args.debug:
         | 
| 382 | 
            +
                    QUEUE_REPO = DEBUG_QUEUE_REPO
         | 
| 383 | 
            +
                    
         | 
| 384 | 
             
                sanity_checks()
         | 
| 385 | 
            +
                print("Processing pending requests")
         | 
| 386 | 
             
                current_pending_status = [PENDING_STATUS]
         | 
| 387 |  | 
| 388 | 
             
                # Get all eval request that are PENDING, if you want to run other evals, change this parameter
         | 
|  | |
| 401 |  | 
| 402 | 
             
                eval_request = eval_requests[0]
         | 
| 403 | 
             
                pp.pprint(eval_request)
         | 
| 404 | 
            +
                
         | 
| 405 | 
            +
                gpu_type = eval_request.gpu_type
         | 
| 406 | 
            +
                curr_gpu_type = get_gpu_details()
         | 
| 407 | 
            +
                if gpu_type != curr_gpu_type:
         | 
| 408 | 
            +
                    print(f"GPU type mismatch: {gpu_type} vs {curr_gpu_type}")
         | 
| 409 | 
            +
                    return False
         | 
| 410 |  | 
| 411 | 
             
                my_snapshot_download(
         | 
| 412 | 
             
                    repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60
         | 
|  | |
| 448 | 
             
                parser.add_argument("--precision", type=str, default="float32,float16,8bit,4bit", help="Precision to debug")
         | 
| 449 | 
             
                parser.add_argument("--inference-framework", type=str, default="hf-chat", help="Inference framework to debug")
         | 
| 450 | 
             
                parser.add_argument("--limit", type=int, default=None, help="Limit for the number of samples")
         | 
| 451 | 
            +
                parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB", 
         | 
| 452 | 
            +
                                    help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
         | 
| 453 | 
             
                return parser.parse_args()
         | 
| 454 |  | 
| 455 |  | 
|  | |
| 478 | 
             
                                        status="",
         | 
| 479 | 
             
                                        json_filepath="",
         | 
| 480 | 
             
                                        precision=precision,  # Use precision from arguments
         | 
| 481 | 
            +
                                        inference_framework=args.inference_framework,  # Use inference framework from arguments
         | 
| 482 | 
            +
                                        gpu_type=args.gpu_type
         | 
| 483 | 
             
                                    )
         | 
| 484 | 
            +
                                    curr_gpu_type = get_gpu_details()
         | 
| 485 | 
            +
                                    if eval_request.gpu_type != curr_gpu_type:
         | 
| 486 | 
            +
                                        print(f"GPU type mismatch: {eval_request.gpu_type} vs {curr_gpu_type}")
         | 
| 487 | 
            +
                                        raise Exception("GPU type mismatch")
         | 
| 488 | 
             
                                    results = process_evaluation(task, eval_request, limit=args.limit)
         | 
| 489 | 
             
                                except Exception as e:
         | 
| 490 | 
             
                                    print(f"debug running error: {e}")
         | 
| @@ -30,3 +30,4 @@ evaluate | |
| 30 | 
             
            spacy
         | 
| 31 | 
             
            selfcheckgpt
         | 
| 32 | 
             
            immutabledict
         | 
|  | 
|  | |
| 30 | 
             
            spacy
         | 
| 31 | 
             
            selfcheckgpt
         | 
| 32 | 
             
            immutabledict
         | 
| 33 | 
            +
            gputil
         | 
| @@ -28,6 +28,7 @@ class EvalRequest: | |
| 28 | 
             
                params: Optional[int] = None
         | 
| 29 | 
             
                license: Optional[str] = ""
         | 
| 30 | 
             
                batch_size: Optional[int] = 1
         | 
|  | |
| 31 |  | 
| 32 | 
             
                def get_model_args(self) -> str:
         | 
| 33 | 
             
                    model_args = f"pretrained={self.model},revision={self.revision},parallelize=True"  # ,max_length=4096"
         | 
|  | |
| 28 | 
             
                params: Optional[int] = None
         | 
| 29 | 
             
                license: Optional[str] = ""
         | 
| 30 | 
             
                batch_size: Optional[int] = 1
         | 
| 31 | 
            +
                gpu_type: Optional[str] = "NVIDIA-A100-PCIe-80GB"
         | 
| 32 |  | 
| 33 | 
             
                def get_model_args(self) -> str:
         | 
| 34 | 
             
                    model_args = f"pretrained={self.model},revision={self.revision},parallelize=True"  # ,max_length=4096"
         | 
| @@ -140,6 +140,7 @@ class EvalQueueColumn:  # Queue column | |
| 140 | 
             
                private = ColumnContent("private", "bool", True)
         | 
| 141 | 
             
                precision = ColumnContent("precision", "str", True)
         | 
| 142 | 
             
                weight_type = ColumnContent("weight_type", "str", "Original")
         | 
|  | |
| 143 | 
             
                status = ColumnContent("status", "str", True)
         | 
| 144 |  | 
| 145 |  | 
| @@ -189,7 +190,25 @@ class InferenceFramework(Enum): | |
| 189 | 
             
                        return InferenceFramework.HF_Chat
         | 
| 190 | 
             
                    return InferenceFramework.Unknown
         | 
| 191 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 192 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 193 | 
             
            class WeightType(Enum):
         | 
| 194 | 
             
                Adapter = ModelDetails("Adapter")
         | 
| 195 | 
             
                Original = ModelDetails("Original")
         | 
|  | |
| 140 | 
             
                private = ColumnContent("private", "bool", True)
         | 
| 141 | 
             
                precision = ColumnContent("precision", "str", True)
         | 
| 142 | 
             
                weight_type = ColumnContent("weight_type", "str", "Original")
         | 
| 143 | 
            +
                model_framework = ColumnContent("inference_framework", "str", True)
         | 
| 144 | 
             
                status = ColumnContent("status", "str", True)
         | 
| 145 |  | 
| 146 |  | 
|  | |
| 190 | 
             
                        return InferenceFramework.HF_Chat
         | 
| 191 | 
             
                    return InferenceFramework.Unknown
         | 
| 192 |  | 
| 193 | 
            +
            class GPUType(Enum):
         | 
| 194 | 
            +
                H100_pcie = ModelDetails("NVIDIA-H100-PCIe-80GB")
         | 
| 195 | 
            +
                A100_pcie = ModelDetails("NVIDIA-A100-PCIe-80GB")
         | 
| 196 | 
            +
                A5000 = ModelDetails("NVIDIA-RTX-A5000-24GB")
         | 
| 197 | 
            +
                Unknown = ModelDetails("?")
         | 
| 198 |  | 
| 199 | 
            +
                def to_str(self):
         | 
| 200 | 
            +
                    return self.value.name
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                @staticmethod
         | 
| 203 | 
            +
                def from_str(gpu_type: str):
         | 
| 204 | 
            +
                    if gpu_type in ["NVIDIA-H100-PCIe-80GB"]:
         | 
| 205 | 
            +
                        return GPUType.A100_pcie
         | 
| 206 | 
            +
                    if gpu_type in ["NVIDIA-A100-PCIe-80GB"]:
         | 
| 207 | 
            +
                        return GPUType.H100_pcie
         | 
| 208 | 
            +
                    if gpu_type in ["NVIDIA-A5000-24GB"]:
         | 
| 209 | 
            +
                        return GPUType.A5000
         | 
| 210 | 
            +
                    return GPUType.Unknown
         | 
| 211 | 
            +
                
         | 
| 212 | 
             
            class WeightType(Enum):
         | 
| 213 | 
             
                Adapter = ModelDetails("Adapter")
         | 
| 214 | 
             
                Original = ModelDetails("Original")
         | 
| @@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests" | |
| 12 | 
             
            QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
         | 
| 13 | 
             
            RESULTS_REPO = "sparse-generative-ai/results"
         | 
| 14 |  | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 |  | 
| 18 | 
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
         | 
| 19 |  | 
|  | |
| 12 | 
             
            QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
         | 
| 13 | 
             
            RESULTS_REPO = "sparse-generative-ai/results"
         | 
| 14 |  | 
| 15 | 
            +
            DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
         | 
| 16 | 
            +
            DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
         | 
| 17 |  | 
| 18 | 
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
         | 
| 19 |  | 
| @@ -95,6 +95,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p | |
| 95 |  | 
| 96 | 
             
                        data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
         | 
| 97 | 
             
                        data[EvalQueueColumn.revision.name] = data.get("revision", "main")
         | 
|  | |
| 98 |  | 
| 99 | 
             
                        all_evals.append(data)
         | 
| 100 | 
             
                    elif ".md" not in entry:
         | 
| @@ -107,6 +108,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p | |
| 107 |  | 
| 108 | 
             
                            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
         | 
| 109 | 
             
                            data[EvalQueueColumn.revision.name] = data.get("revision", "main")
         | 
|  | |
| 110 | 
             
                            all_evals.append(data)
         | 
| 111 |  | 
| 112 | 
             
                pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
         | 
|  | |
| 95 |  | 
| 96 | 
             
                        data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
         | 
| 97 | 
             
                        data[EvalQueueColumn.revision.name] = data.get("revision", "main")
         | 
| 98 | 
            +
                        data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
         | 
| 99 |  | 
| 100 | 
             
                        all_evals.append(data)
         | 
| 101 | 
             
                    elif ".md" not in entry:
         | 
|  | |
| 108 |  | 
| 109 | 
             
                            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
         | 
| 110 | 
             
                            data[EvalQueueColumn.revision.name] = data.get("revision", "main")
         | 
| 111 | 
            +
                            data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
         | 
| 112 | 
             
                            all_evals.append(data)
         | 
| 113 |  | 
| 114 | 
             
                pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
         | 
| @@ -130,7 +130,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]: | |
| 130 | 
             
                                continue
         | 
| 131 | 
             
                            with open(os.path.join(root, file), "r") as f:
         | 
| 132 | 
             
                                info = json.load(f)
         | 
| 133 | 
            -
                                file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}")
         | 
| 134 |  | 
| 135 | 
             
                                # Select organisation
         | 
| 136 | 
             
                                if info["model"].count("/") == 0 or "submitted_time" not in info:
         | 
|  | |
| 130 | 
             
                                continue
         | 
| 131 | 
             
                            with open(os.path.join(root, file), "r") as f:
         | 
| 132 | 
             
                                info = json.load(f)
         | 
| 133 | 
            +
                                file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}_{info['inference_framework']}_{info['gpu_type']}")
         | 
| 134 |  | 
| 135 | 
             
                                # Select organisation
         | 
| 136 | 
             
                                if info["model"].count("/") == 0 or "submitted_time" not in info:
         | 
| @@ -3,7 +3,7 @@ import os | |
| 3 | 
             
            from datetime import datetime, timezone
         | 
| 4 |  | 
| 5 | 
             
            from src.display.formatting import styled_error, styled_message, styled_warning
         | 
| 6 | 
            -
            from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
         | 
| 7 | 
             
            from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
         | 
| 8 | 
             
            from src.submission.check_validity import (
         | 
| 9 | 
             
                already_submitted_models,
         | 
| @@ -26,12 +26,17 @@ def add_new_eval( | |
| 26 | 
             
                weight_type: str,
         | 
| 27 | 
             
                model_type: str,
         | 
| 28 | 
             
                inference_framework: str,
         | 
|  | |
|  | |
| 29 | 
             
            ):
         | 
| 30 | 
             
                global REQUESTED_MODELS
         | 
| 31 | 
             
                global USERS_TO_SUBMISSION_DATES
         | 
| 32 | 
             
                if not REQUESTED_MODELS:
         | 
| 33 | 
             
                    REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
         | 
| 34 |  | 
|  | |
|  | |
|  | |
| 35 | 
             
                user_name = ""
         | 
| 36 | 
             
                model_path = model
         | 
| 37 | 
             
                if "/" in model:
         | 
| @@ -110,17 +115,18 @@ def add_new_eval( | |
| 110 | 
             
                    "params": model_size,
         | 
| 111 | 
             
                    "license": license,
         | 
| 112 | 
             
                    "inference_framework": inference_framework,
         | 
|  | |
| 113 | 
             
                }
         | 
| 114 |  | 
| 115 | 
             
                # Check for duplicate submission
         | 
| 116 | 
            -
                if f"{model}_{revision}_{precision}_{inference_framework}" in REQUESTED_MODELS:
         | 
| 117 | 
             
                    return styled_warning("This model has been already submitted.")
         | 
| 118 |  | 
| 119 | 
             
                print("Creating eval file")
         | 
| 120 | 
             
                OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
         | 
| 121 | 
             
                os.makedirs(OUT_DIR, exist_ok=True)
         | 
| 122 | 
             
                # out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
         | 
| 123 | 
            -
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}.json"
         | 
| 124 |  | 
| 125 | 
             
                with open(out_path, "w") as f:
         | 
| 126 | 
             
                    f.write(json.dumps(eval_entry))
         | 
|  | |
| 3 | 
             
            from datetime import datetime, timezone
         | 
| 4 |  | 
| 5 | 
             
            from src.display.formatting import styled_error, styled_message, styled_warning
         | 
| 6 | 
            +
            from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
         | 
| 7 | 
             
            from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
         | 
| 8 | 
             
            from src.submission.check_validity import (
         | 
| 9 | 
             
                already_submitted_models,
         | 
|  | |
| 26 | 
             
                weight_type: str,
         | 
| 27 | 
             
                model_type: str,
         | 
| 28 | 
             
                inference_framework: str,
         | 
| 29 | 
            +
                debug: bool = False,
         | 
| 30 | 
            +
                gpu_type: str = "NVIDIA-A100-PCIe-80GB",
         | 
| 31 | 
             
            ):
         | 
| 32 | 
             
                global REQUESTED_MODELS
         | 
| 33 | 
             
                global USERS_TO_SUBMISSION_DATES
         | 
| 34 | 
             
                if not REQUESTED_MODELS:
         | 
| 35 | 
             
                    REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
         | 
| 36 |  | 
| 37 | 
            +
                if debug:
         | 
| 38 | 
            +
                    QUEUE_REPO = DEBUG_QUEUE_REPO
         | 
| 39 | 
            +
                
         | 
| 40 | 
             
                user_name = ""
         | 
| 41 | 
             
                model_path = model
         | 
| 42 | 
             
                if "/" in model:
         | 
|  | |
| 115 | 
             
                    "params": model_size,
         | 
| 116 | 
             
                    "license": license,
         | 
| 117 | 
             
                    "inference_framework": inference_framework,
         | 
| 118 | 
            +
                    "gpu_type": gpu_type
         | 
| 119 | 
             
                }
         | 
| 120 |  | 
| 121 | 
             
                # Check for duplicate submission
         | 
| 122 | 
            +
                if f"{model}_{revision}_{precision}_{inference_framework}_{gpu_type}" in REQUESTED_MODELS:
         | 
| 123 | 
             
                    return styled_warning("This model has been already submitted.")
         | 
| 124 |  | 
| 125 | 
             
                print("Creating eval file")
         | 
| 126 | 
             
                OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
         | 
| 127 | 
             
                os.makedirs(OUT_DIR, exist_ok=True)
         | 
| 128 | 
             
                # out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
         | 
| 129 | 
            +
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}_{gpu_type}.json"
         | 
| 130 |  | 
| 131 | 
             
                with open(out_path, "w") as f:
         | 
| 132 | 
             
                    f.write(json.dumps(eval_entry))
         |