prs-eth
/

marigold-depth-hr-v1-1

Depth Estimation

MarigoldDepthHRPipeline

depth estimation

high resolution

computer vision

Model card Files Files and versions

nandometzger commited on Jun 18

Commit

9973b4a

·

1 Parent(s): a4b6cb2

add find bs func2

Files changed (1) hide show

pipeline.py +26 -0

pipeline.py CHANGED Viewed

@@ -1820,6 +1820,32 @@ def ensemble_depth(
     return depth, uncertainty  # [1,1,H,W], [1,1,H,W]
 def find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
     """
     Automatically search for suitable operating batch size.

     return depth, uncertainty  # [1,1,H,W], [1,1,H,W]
+# Search table for suggested max. inference batch size
+bs_search_table = [
+    # tested on A100-PCIE-80GB
+    {"res": 768, "total_vram": 79, "bs": 35, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 79, "bs": 20, "dtype": torch.float32},
+    # tested on A100-PCIE-40GB
+    {"res": 768, "total_vram": 39, "bs": 15, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 39, "bs": 8, "dtype": torch.float32},
+    {"res": 768, "total_vram": 39, "bs": 30, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 39, "bs": 15, "dtype": torch.float16},
+    # tested on RTX3090, RTX4090
+    {"res": 512, "total_vram": 23, "bs": 20, "dtype": torch.float32},
+    {"res": 768, "total_vram": 23, "bs": 7, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 23, "bs": 3, "dtype": torch.float32},
+    {"res": 512, "total_vram": 23, "bs": 40, "dtype": torch.float16},
+    {"res": 768, "total_vram": 23, "bs": 18, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 23, "bs": 10, "dtype": torch.float16},
+    # tested on GTX1080Ti
+    {"res": 512, "total_vram": 10, "bs": 5, "dtype": torch.float32},
+    {"res": 768, "total_vram": 10, "bs": 2, "dtype": torch.float32},
+    {"res": 512, "total_vram": 10, "bs": 10, "dtype": torch.float16},
+    {"res": 768, "total_vram": 10, "bs": 5, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 10, "bs": 3, "dtype": torch.float16},
+]
 def find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
     """
     Automatically search for suitable operating batch size.