Clémentine commited on
Commit
6bc96ff
1 Parent(s): 8b88d2c

debug inference endpoint launch and requirements

Browse files
app.py CHANGED
@@ -19,8 +19,8 @@ This is just a visual for the auto evaluator. Note that the lines of the log vis
19
  with gr.Blocks(js=dark_mode_gradio_js) as demo:
20
  with gr.Tab("Application"):
21
  gr.Markdown(intro_md)
22
- dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
23
  output = gr.HTML(log_file_to_html_string, every=10)
 
24
 
25
  if __name__ == '__main__':
26
  demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
19
  with gr.Blocks(js=dark_mode_gradio_js) as demo:
20
  with gr.Tab("Application"):
21
  gr.Markdown(intro_md)
 
22
  output = gr.HTML(log_file_to_html_string, every=10)
23
+ dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
24
 
25
  if __name__ == '__main__':
26
  demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
requirements.txt CHANGED
@@ -18,7 +18,11 @@ git+https://github.com/huggingface/lighteval.git#egg=lighteval
18
  accelerate==0.24.1
19
  sentencepiece
20
 
 
 
 
 
21
  # Log Visualizer
22
- beautifulsoup4==4.12.2
23
  lxml==4.9.3
24
  rich==13.3.4
 
18
  accelerate==0.24.1
19
  sentencepiece
20
 
21
+ # Evaluation suites
22
+ lighteval
23
+ lm_eval
24
+
25
  # Log Visualizer
26
+ BeautifulSoup4==4.12.2
27
  lxml==4.9.3
28
  rich==13.3.4
src/backend/run_eval_suite_lighteval.py CHANGED
@@ -1,5 +1,5 @@
1
  import json
2
- import os
3
  import logging
4
  from datetime import datetime
5
 
@@ -16,15 +16,18 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
16
  if limit:
17
  logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
18
 
19
- args = {
20
- "endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
 
21
  "accelerator": accelerator,
22
  "vendor": vendor,
23
  "region": region,
24
  "instance_size": instance_size,
25
  "instance_type": instance_type,
26
- "max_samples": limit,
27
- "job_id": str(datetime.now()),
 
 
28
  "push_results_to_hub": True,
29
  "save_details": True,
30
  "push_details_to_hub": True,
@@ -32,10 +35,22 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
32
  "cache_dir": CACHE_PATH,
33
  "results_org": RESULTS_REPO,
34
  "output_dir": local_dir,
 
 
35
  "override_batch_size": batch_size,
36
  "custom_tasks": "custom_tasks.py",
37
- "tasks": task_names
 
 
 
 
 
 
 
 
 
38
  }
 
39
 
40
  try:
41
  results = main(args)
@@ -47,7 +62,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
47
  dumped = json.dumps(results, indent=2)
48
  logger.info(dumped)
49
  except Exception: # if eval failed, we force a cleanup
50
- env_config = EnvConfig(token=TOKEN, cache_dir=args['cache_dir'])
51
 
52
  model_config = create_model_config(args=args, accelerator=accelerator)
53
  model, _ = load_model(config=model_config, env_config=env_config)
 
1
  import json
2
+ import argparse
3
  import logging
4
  from datetime import datetime
5
 
 
16
  if limit:
17
  logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
18
 
19
+ args_dict = {
20
+ # Endpoint parameters
21
+ "endpoint_model_name":eval_request.model,
22
  "accelerator": accelerator,
23
  "vendor": vendor,
24
  "region": region,
25
  "instance_size": instance_size,
26
  "instance_type": instance_type,
27
+ "reuse_existing": False,
28
+ "model_dtype": eval_request.precision,
29
+ "revision": eval_request.revision,
30
+ # Save parameters
31
  "push_results_to_hub": True,
32
  "save_details": True,
33
  "push_details_to_hub": True,
 
35
  "cache_dir": CACHE_PATH,
36
  "results_org": RESULTS_REPO,
37
  "output_dir": local_dir,
38
+ "job_id": str(datetime.now()),
39
+ # Experiment parameters
40
  "override_batch_size": batch_size,
41
  "custom_tasks": "custom_tasks.py",
42
+ "tasks": task_names,
43
+ "max_samples": limit,
44
+ "use_chat_template": False,
45
+ "system_prompt": None,
46
+ # Parameters which would be set to things by the kwargs if actually using argparse
47
+ "inference_server_address": None,
48
+ "model_args": None,
49
+ "num_fewshot_seeds": None,
50
+ "delta_weights": False,
51
+ "adapter_weights": False
52
  }
53
+ args = argparse.Namespace(**args_dict)
54
 
55
  try:
56
  results = main(args)
 
62
  dumped = json.dumps(results, indent=2)
63
  logger.info(dumped)
64
  except Exception: # if eval failed, we force a cleanup
65
+ env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
66
 
67
  model_config = create_model_config(args=args, accelerator=accelerator)
68
  model, _ = load_model(config=model_config, env_config=env_config)