meg HF staff commited on
Commit
55d0c85
·
verified ·
1 Parent(s): bb6f5b0

Dummy for more regions/vendors.

Browse files
src/backend/compute_memory_requirements.py CHANGED
@@ -4,30 +4,36 @@ from src.logging import setup_logger
4
  logger = setup_logger(__name__)
5
 
6
 
7
- def get_instance_needs(model_name: str, access_token: str):
8
  """Scales up compute based on size and price."""
9
  needed_space = get_size(model_name, access_token)
10
  if needed_space:
11
- if needed_space < 20:
12
- # Cheapest
13
- return 'x1', 'nvidia-a10g'
14
- elif needed_space < 60:
15
- return 'x4', 'nvidia-t4'
16
- elif needed_space < 80:
17
- return 'x1', 'nvidia-a100'
18
- elif needed_space < 95:
19
- return 'x4', 'nvidia-a10g'
20
- elif needed_space < 150:
21
- return 'x2', 'nvidia-a100'
22
- # Not doing any higher (for now) as that would start costing a lot.
 
 
 
 
 
 
23
  else:
24
  # A default size to start trying to scale up from.
25
  return 'x4', 'nvidia-l4'
26
 
27
 
28
  # Code based in part on https://huggingface.co/spaces/hf-accelerate/model-memory-usage
29
- def get_size(model_name: str, access_token: str, library="auto",
30
- dtype="float32"):
31
  """
32
  This is just to get a size estimate of the model.
33
  Assuming dtype float32, which isn't always true.
@@ -54,6 +60,6 @@ if __name__ == '__main__':
54
  # Debugging here
55
  import os
56
 
57
- num_gigs_debug = get_size("upstage/SOLAR-10.7B-v1.0",
58
- access_token=os.environ.get("HF_TOKEN"))
59
  print(num_gigs_debug)
 
4
  logger = setup_logger(__name__)
5
 
6
 
7
+ def get_instance_needs(model_name: str, access_token: str, region='us-east-1', vendor='aws'):
8
  """Scales up compute based on size and price."""
9
  needed_space = get_size(model_name, access_token)
10
  if needed_space:
11
+ # AWS is the only thing I've implemented this for for now.
12
+ if region =='us-east-1' and vendor == 'aws':
13
+ if needed_space < 20:
14
+ # Cheapest
15
+ return 'x1', 'nvidia-a10g'
16
+ elif needed_space < 60:
17
+ return 'x4', 'nvidia-t4'
18
+ elif needed_space < 80:
19
+ return 'x1', 'nvidia-a100'
20
+ elif needed_space < 95:
21
+ return 'x4', 'nvidia-a10g'
22
+ elif needed_space < 150:
23
+ return 'x2', 'nvidia-a100'
24
+ # Not doing any higher (for now) as that would start costing a lot.
25
+ else:
26
+ logger.warning("Not implemented for region %s vendor %s" % (region, vendor))
27
+ logger.warning("Only implemented for aws us-east-1. Pretending that's what you asked for.")
28
+ return get_instance_needs(model_name=model_name, access_token=access_token)
29
  else:
30
  # A default size to start trying to scale up from.
31
  return 'x4', 'nvidia-l4'
32
 
33
 
34
  # Code based in part on https://huggingface.co/spaces/hf-accelerate/model-memory-usage
35
+ def get_size(model_name: str, access_token: str, library='auto',
36
+ dtype='float32'):
37
  """
38
  This is just to get a size estimate of the model.
39
  Assuming dtype float32, which isn't always true.
 
60
  # Debugging here
61
  import os
62
 
63
+ num_gigs_debug = get_size('upstage/SOLAR-10.7B-v1.0',
64
+ access_token=os.environ.get('HF_TOKEN'))
65
  print(num_gigs_debug)