Spaces:

ruili3
/

LaRI

Running

App Files Files Community

ruili3 commited on Apr 6

Commit

860c6b0

1 Parent(s): 0cd3872

init LaRI demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +195 -0
app.py +282 -0
demo.py +136 -0
requirements.txt +19 -0
src/lari/model/__init__.py +2 -0
src/lari/model/blocks.py +209 -0
src/lari/model/dinoseg_model.py +153 -0
src/lari/model/dinov2/__init__.py +6 -0
src/lari/model/dinov2/hub/__init__.py +4 -0
src/lari/model/dinov2/hub/backbones.py +156 -0
src/lari/model/dinov2/hub/utils.py +39 -0
src/lari/model/dinov2/layers/__init__.py +11 -0
src/lari/model/dinov2/layers/attention.py +89 -0
src/lari/model/dinov2/layers/block.py +259 -0
src/lari/model/dinov2/layers/dino_head.py +58 -0
src/lari/model/dinov2/layers/drop_path.py +34 -0
src/lari/model/dinov2/layers/layer_scale.py +27 -0
src/lari/model/dinov2/layers/mlp.py +40 -0
src/lari/model/dinov2/layers/patch_embed.py +88 -0
src/lari/model/dinov2/layers/swiglu_ffn.py +72 -0
src/lari/model/dinov2/models/__init__.py +43 -0
src/lari/model/dinov2/models/vision_transformer.py +396 -0
src/lari/model/dinov2/utils/__init__.py +4 -0
src/lari/model/dinov2/utils/cluster.py +95 -0
src/lari/model/dinov2/utils/config.py +72 -0
src/lari/model/dinov2/utils/dtype.py +37 -0
src/lari/model/dinov2/utils/param_groups.py +103 -0
src/lari/model/dinov2/utils/utils.py +95 -0
src/lari/model/dpt_seg_head.py +158 -0
src/lari/model/heads.py +104 -0
src/lari/model/lari_model.py +177 -0
src/lari/model/utils.py +38 -0
src/lari/utils/__init__.py +0 -0
src/lari/utils/geometry_numpy.py +187 -0
src/lari/utils/geometry_torch.py +221 -0
src/utils/__init__.py +2 -0
src/utils/vis.py +105 -0
src/utils3d/README.md +3 -0
src/utils3d/__init__.py +20 -0
src/utils3d/_helpers.py +35 -0
src/utils3d/_unified/__init__.py +934 -0
src/utils3d/_unified/__init__.pyi +0 -0
src/utils3d/io/__init__.py +3 -0
src/utils3d/io/colmap.py +139 -0
src/utils3d/io/obj.py +146 -0
src/utils3d/io/ply.py +104 -0
src/utils3d/numpy/__init__.py +142 -0
src/utils3d/numpy/_helpers.py +93 -0
src/utils3d/numpy/mesh.py +355 -0
src/utils3d/numpy/quadmesh.py +472 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,195 @@

+scripts/rendering/blender-*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.vscode/
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+*.sif
+blender-4.2.5-linux-x64*/
+*.zip
+*.png
+*.jpg
+*.log
+intermediate/
+__pycache__
+*.ply
+*.npy
+*.npz
+*.obj
+*.mtl
+*.json.gz
+dcgm/
+wandb/
+# *.json
+# Exception to add training list
+!lgm_leq20Kpts_simtopo25_train.json.gz
+!lgm_leq20Kpts_simtopo25_test.json.gz
+!lgm_leq20Kpts_train.json.gz
+!lgm_leq20Kpts_train_same_size_wrt_simtopo.json.gz
+*.mp4
+*.gif
+*.glb
+!lgm_leq20Kpts_plus_3Kremain_train.json.gz
+!lgm_leq20Kpts_test_cleaned.json.gz
+!lgm_leq20Kpts_train_cleaned.json.gz
+test_metrics.json

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import argparse
+import gradio
+import torch
+import torch.backends.cudnn as cudnn
+from src.utils.vis import prob_to_mask
+from src.lari.model import LaRIModel, DinoSegModel
+from tools import load_model, process_image, post_process_output, get_masked_depth, save_to_glb, get_point_cloud, removebg_crop
+from huggingface_hub import hf_hub_download
+parser = argparse.ArgumentParser("Arguments for deploying a LaRI Demo")
+parser.add_argument(
+    "--model_info_pm",
+    type=str,
+    default="LaRIModel(use_pretrained = 'moge_full', num_output_layer = 5, head_type = 'point')",
+    help="Network parameters to load the model",
+)
+parser.add_argument(
+    "--model_info_mask",
+    type=str,
+    default="DinoSegModel(use_pretrained = 'dinov2', dim_proj = 256, pretrained_path = '', num_output_layer = 4, output_type = 'ray_stop')",
+    help="Network parameters to load the model",
+)
+parser.add_argument(
+    "--ckpt_path_pm",
+    type=str,
+    default="lari_obj_16k_pointmap.pth",
+    help="Path to pre-trained weights",
+)
+parser.add_argument(
+    "--ckpt_path_mask",
+    type=str,
+    default="lari_obj_16k_seg.pth",
+    help="Path to pre-trained weights",
+)
+parser.add_argument(
+    "--resolution", type=int, default=512, help="Default model resolution"
+)
+args = parser.parse_args()
+def model_forward(pil_input, layered_id, rembg_checkbox):
+    """
+    Perform LaRI estimation by:
+    1. image processing
+    2. network forward
+    3. save masked layered depth image
+    4. save point cloud
+    """
+    if pil_input is None:
+        return (None, None, None, None, None, None)
+    if rembg_checkbox:
+        pil_input = removebg_crop(pil_input)
+    # Process the input image.
+    input_tensor, ori_img_tensor, crop_coords, original_size = process_image(
+        pil_input, resolution=512
+    )
+    input_tensor = input_tensor.to(device)
+    # Run inference.
+    with torch.no_grad():
+        # lari map
+        pred_dict = model_pm(input_tensor)
+        lari_map = -pred_dict["pts3d"].squeeze(
+            0
+        )  # Expected output shape: (H_reso, W_reso, L, 3)
+        # mask
+        if model_mask:
+            pred_dict = model_mask(input_tensor)
+            assert "seg_prob" in pred_dict
+            valid_mask = prob_to_mask(pred_dict["seg_prob"].squeeze(0))  # H W L 1
+        else:
+            h, w, l, _ = lari_map.shape
+            valid_mask = torch.new_ones((h, w, l, 1), device=lari_map.device)
+    # crop & resize the output to the original resolution.
+    if original_size[0] != args.resolution or original_size[1] != args.resolution:
+        lari_map = post_process_output(lari_map, crop_coords, original_size)  # H W L 3
+        valid_mask = post_process_output(
+            valid_mask.float(), crop_coords, original_size
+        ).bool()  # H W L 1
+    max_n_layer = min(valid_mask.shape[-2], lari_map.shape[-2])
+    valid_mask = valid_mask[:, :, :max_n_layer, :]
+    lari_map = lari_map[:, :, :max_n_layer, :]
+    curr_layer_id = min(max_n_layer - 1, layered_id - 1)
+    # masked depth list
+    depth_image = get_masked_depth(
+        lari_map=lari_map, valid_mask=valid_mask, layer_id=curr_layer_id
+    )
+    # point cloud
+    glb_path, ply_path = get_point_cloud(
+        lari_map, ori_img_tensor, valid_mask, first_layer_color="pseudo"
+    )
+    return (
+        depth_image,
+        glb_path,
+        lari_map,
+        valid_mask,
+        0,
+        max_n_layer - 1,
+        glb_path,
+        ply_path,
+        pil_input,
+    )
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+cudnn.benchmark = True
+# Download the file
+model_path_pm = hf_hub_download(repo_id="ruili3/LaRI", filename=args.ckpt_path_pm, repo_type="model")
+model_path_mask = hf_hub_download(repo_id="ruili3/LaRI", filename=args.ckpt_path_mask, repo_type="model")
+# Load the model with pretrained weights.
+model_pm = load_model(args.model_info_pm, model_path_pm, device)
+model_mask = (
+    load_model(args.model_info_mask, model_path_mask, device)
+    if args.model_info_mask is not None
+    else None
+)
+def change_layer(slider_layer_id, lari_map, valid_mask, min_layer_id, max_layer_id):
+    if lari_map is None:
+        return
+    slider_layer_id = slider_layer_id - 1
+    curr_layer_id = min(slider_layer_id, max_layer_id)
+    curr_layer_id = max(curr_layer_id, min_layer_id)
+    # masked depth list
+    depth_image = get_masked_depth(
+        lari_map=lari_map, valid_mask=valid_mask, layer_id=curr_layer_id
+    )
+    return depth_image
+def clear_everything():
+    return (
+        gradio.update(value=None),
+        gradio.update(value=None),
+        gradio.update(value=None),
+        gradio.update(value=None),
+        gradio.update(value=None),
+        gradio.update(value=None),
+        gradio.update(value=None),
+    )
+with gradio.Blocks(
+    css=""".gradio-container {margin: 0 !important; min-width: 100%};""",
+    title="LaRI Demo",
+) as demo:
+    gradio.Markdown(
+        "<h1 style='text-align: center;'>LaRI: Layered Ray Intersections for Single-view 3D Geometric Reasoning</h1>",
+        elem_id="title",
+    )
+    gradio.Markdown(
+        """
+        This is the official demo of Layered Ray Intersection (<a href="https://ruili3.github.io/lari/index.html" target="_blank" style="color: #2a9d8f;">LaRI</a>). For a quick start, click the images in 'Examples' and then click the 'Process' Button.
+        You can try with your own images with following steps:
+        - Load an image;
+        - Click the 'Process' button;
+        - Browse layered depth maps (z-channel of the resulting LaRI point map) by tunning 'Layer ID';
+        Note that in '3D Point Cloud', different color denotes diffrent intersection layers, i.e., <b style="color: #FFBD1C;">layer 1</b>, <b style="color: #FB5607;">layer 2</b>, <b style="color: #F15BB5;">layer 3</b>, <b style="color: #8338EC;">layer 4</b>.
+        """
+    )
+    # , <b style="color: #3A86FF;">layer 5</b>.
+    lari_map = gradio.State(None)
+    valid_mask = gradio.State(None)
+    min_layer_id = gradio.State(None)
+    max_layer_id = gradio.State(None)
+    with gradio.Column():
+        with gradio.Row(equal_height=True):
+            with gradio.Column(scale=1):
+                image_input = gradio.Image(
+                    label="Upload an Image", type="pil", height=350
+                )
+                with gradio.Row():
+                    rembg_checkbox = gradio.Checkbox(label="Remove background")
+                    clear_button = gradio.Button("Clear")
+                    submit_btn = gradio.Button("Process")
+            with gradio.Column(scale=1):
+                depth_output = gradio.Image(
+                    label="LaRI Map at Z-axis (depth)",
+                    type="pil",
+                    interactive=False,
+                    height=300,
+                )
+                slider_layer_id = gradio.Slider(
+                    minimum=1,
+                    maximum=4,
+                    step=1,
+                    value=1,
+                    label="Layer ID",
+                    interactive=True,
+                )
+        with gradio.Row(scale=1):
+            outmodel = gradio.Model3D(
+                label="3D Point Cloud (Color denotes different layers)",
+                interactive=False,
+                zoom_speed=0.5,
+                pan_speed=0.5,
+                height=450,
+            )
+    with gradio.Row():
+        ply_file_output = gradio.File(label="ply output", elem_classes="small-file")
+        glb_file_output = gradio.File(label="glb output", elem_classes="small-file")
+    submit_btn.click(
+        fn=model_forward,
+        inputs=[image_input, slider_layer_id, rembg_checkbox],
+        outputs=[
+            depth_output,
+            outmodel,
+            lari_map,
+            valid_mask,
+            min_layer_id,
+            max_layer_id,
+            glb_file_output,
+            ply_file_output,
+            image_input,
+        ],
+    )
+    clear_button.click(
+        fn=clear_everything,
+        outputs=[
+            lari_map,
+            valid_mask,
+            min_layer_id,
+            max_layer_id,
+            image_input,
+            depth_output,
+            outmodel,
+        ],
+    )
+    slider_layer_id.change(
+        fn=change_layer,
+        inputs=[slider_layer_id, lari_map, valid_mask, min_layer_id, max_layer_id],
+        outputs=depth_output,
+    )
+    gradio.Examples(examples=["assets/cole_hardware.png",
+                              "assets/3m_tape.png",
+                              "assets/horse.png",
+                              "assets/rhino.png",
+                              "assets/alphabet.png",
+                              "assets/martin_wedge.png",
+                              "assets/d_rose.png",
+                              "assets/ace.png",
+                              "assets/bifidus.png",
+                              "assets/fem.png",
+                              ],
+                              inputs=image_input)
+demo.launch(share=False)

demo.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import argparse
+import os
+import torch
+import torch.backends.cudnn as cudnn
+from PIL import Image
+from src.utils.vis import prob_to_mask
+from huggingface_hub import hf_hub_download
+from tools import load_model, process_image, post_process_output, get_masked_depth, get_point_cloud, removebg_crop
+parser = argparse.ArgumentParser("Arguments for deploying a LaRI Demo")
+parser.add_argument(
+    "--image_path",
+    type=str,
+    default="assets/cole_hardware.png",
+    help="input image name",
+)
+parser.add_argument(
+    "--output_path",
+    type=str,
+    default="./results",
+    help="path to save the image",
+)
+parser.add_argument(
+    "--model_info_pm",
+    type=str,
+    default="LaRIModel(use_pretrained = 'moge_full', num_output_layer = 5, head_type = 'point')",
+    help="Network parameters to load the model",
+)
+parser.add_argument(
+    "--model_info_mask",
+    type=str,
+    default="DinoSegModel(use_pretrained = 'dinov2', dim_proj = 256, pretrained_path = '', num_output_layer = 4, output_type = 'ray_stop')",
+    help="Network parameters to load the model",
+)
+parser.add_argument(
+    "--ckpt_path_pm",
+    type=str,
+    default="lari_obj_16k_pointmap.pth",
+    help="Path to pre-trained weights",
+)
+parser.add_argument(
+    "--ckpt_path_mask",
+    type=str,
+    default="lari_obj_16k_seg.pth",
+    help="Path to pre-trained weights",
+)
+parser.add_argument(
+    "--resolution", type=int, default=512, help="Default model resolution"
+)
+parser.add_argument(
+    "--is_remove_background", action="store_true", help="Automatically remove the background."
+)
+args = parser.parse_args()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+cudnn.benchmark = True
+# === Load the model
+model_path_pm = hf_hub_download(repo_id="ruili3/LaRI", filename=args.ckpt_path_pm, repo_type="model")
+model_path_mask = hf_hub_download(repo_id="ruili3/LaRI", filename=args.ckpt_path_mask, repo_type="model")
+# Load the model with pretrained weights.
+model_pm = load_model(args.model_info_pm, model_path_pm, device)
+model_mask = (
+    load_model(args.model_info_mask, model_path_mask, device)
+    if args.model_info_mask is not None
+    else None
+)
+# === Image pre-processing
+pil_input = Image.open(args.image_path)
+if args.is_remove_background:
+    pil_input = removebg_crop(pil_input) # remove background
+input_tensor, ori_img_tensor, crop_coords, original_size = process_image(
+    pil_input, resolution=512) # crop & resize to fit the model input size
+input_tensor = input_tensor.to(device)
+# === Run inference
+with torch.no_grad():
+    # lari map
+    pred_dict = model_pm(input_tensor)
+    lari_map = -pred_dict["pts3d"].squeeze(
+        0
+    )
+    # mask
+    if model_mask:
+        pred_dict = model_mask(input_tensor)
+        assert "seg_prob" in pred_dict
+        valid_mask = prob_to_mask(pred_dict["seg_prob"].squeeze(0))  # H W L 1
+    else:
+        h, w, l, _ = lari_map.shape
+        valid_mask = torch.new_ones((h, w, l, 1), device=lari_map.device)
+# === crop & resize back to the original resolution
+if original_size[0] != args.resolution or original_size[1] != args.resolution:
+    lari_map = post_process_output(lari_map, crop_coords, original_size)  # H W L 3
+    valid_mask = post_process_output(
+        valid_mask.float(), crop_coords, original_size
+    ).bool()  # H W L 1
+max_n_layer = min(valid_mask.shape[-2], lari_map.shape[-2])
+valid_mask = valid_mask[:, :, :max_n_layer, :]
+lari_map = lari_map[:, :, :max_n_layer, :]
+# === save output
+os.makedirs(args.output_path, exist_ok=True)
+for layer_id in range(max_n_layer):
+    depth_pil = get_masked_depth(
+        lari_map=lari_map, valid_mask=valid_mask, layer_id=layer_id
+    )
+    depth_pil.save(os.path.join(args.output_path, f"layered_depth_{layer_id}.jpg"))
+# point cloud
+glb_path, ply_path = get_point_cloud(
+    lari_map, ori_img_tensor, valid_mask, first_layer_color="pseudo",
+    target_folder=args.output_path
+)
+print("All results saved to `{}`.".format(args.output_path))

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+gradio==5.23.3
+huggingface_hub==0.30.1
+imageio==2.37.0
+matplotlib==3.10.1
+moderngl==5.12.0
+omegaconf==2.3.0
+opencv_python==4.11.0.86
+opencv_python_headless==4.11.0.86
+Pillow==11.1.0
+piqp==0.5.0
+plyfile==1.1
+rembg==2.0.65
+scipy==1.15.2
+torchvision==0.21.0
+trimesh==4.6.4
+xformers==0.0.29.post3
+numpy==1.26.4
+torch==2.6.0
+opencv-python==4.11.0

src/lari/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .lari_model import LaRIModel
2	+ from .dinoseg_model import DinoSegModel

src/lari/model/blocks.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from typing import *
+import torch.nn as nn
+class ResidualConvBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int = None, hidden_channels: int = None, padding_mode: str = 'replicate', activation: Literal['relu', 'leaky_relu', 'silu', 'elu'] = 'relu', norm: Literal['group_norm', 'layer_norm'] = 'group_norm'):
+        super(ResidualConvBlock, self).__init__()
+        if out_channels is None:
+            out_channels = in_channels
+        if hidden_channels is None:
+            hidden_channels = in_channels
+        if activation =='relu':
+            activation_cls = lambda: nn.ReLU(inplace=True)
+        elif activation == 'leaky_relu':
+            activation_cls = lambda: nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        elif activation =='silu':
+            activation_cls = lambda: nn.SiLU(inplace=True)
+        elif activation == 'elu':
+            activation_cls = lambda: nn.ELU(inplace=True)
+        else:
+            raise ValueError(f'Unsupported activation function: {activation}')
+        self.layers = nn.Sequential(
+            nn.GroupNorm(1, in_channels),
+            activation_cls(),
+            nn.Conv2d(in_channels, hidden_channels, kernel_size=3, padding=1, padding_mode=padding_mode),
+            nn.GroupNorm(hidden_channels // 32 if norm == 'group_norm' else 1, hidden_channels),
+            activation_cls(),
+            nn.Conv2d(hidden_channels, out_channels, kernel_size=3, padding=1, padding_mode=padding_mode)
+        )
+        self.skip_connection = nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0) if in_channels != out_channels else nn.Identity()
+    def forward(self, x):
+        skip = self.skip_connection(x)
+        x = self.layers(x)
+        x = x + skip
+        return x
+def make_upsampler(in_channels: int, out_channels: int):
+    upsampler = nn.Sequential(
+        nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
+        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, padding_mode='replicate')
+    )
+    upsampler[0].weight.data[:] = upsampler[0].weight.data[:, :, :1, :1]
+    return upsampler
+def make_output_block(dim_in: int, dim_out: int, dim_times_res_block_hidden: int, last_res_blocks: int, last_conv_channels: int, last_conv_size: int, res_block_norm: Literal['group_norm', 'layer_norm']):
+    return nn.Sequential(
+        nn.Conv2d(dim_in, last_conv_channels, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
+        *(ResidualConvBlock(last_conv_channels, last_conv_channels, dim_times_res_block_hidden * last_conv_channels, activation='relu', norm=res_block_norm) for _ in range(last_res_blocks)),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(last_conv_channels, dim_out, kernel_size=last_conv_size, stride=1, padding=last_conv_size // 2, padding_mode='replicate'),
+    )
+# ---- the following are from Depth Anything ----
+import torch.nn as nn
+def _make_scratch(in_shape, out_shape, groups=1, expand=False):
+    scratch = nn.Module()
+    out_shape1 = out_shape
+    out_shape2 = out_shape
+    out_shape3 = out_shape
+    if len(in_shape) >= 4:
+        out_shape4 = out_shape
+    if expand:
+        out_shape1 = out_shape
+        out_shape2 = out_shape * 2
+        out_shape3 = out_shape * 4
+        if len(in_shape) >= 4:
+            out_shape4 = out_shape * 8
+    scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
+    scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
+    scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
+    if len(in_shape) >= 4:
+        scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
+    return scratch
+class ResidualConvUnit(nn.Module):
+    """Residual convolution module.
+    """
+    def __init__(self, features, activation, bn):
+        """Init.
+        Args:
+            features (int): number of features
+        """
+        super().__init__()
+        self.bn = bn
+        self.groups=1
+        self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
+        self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
+        if self.bn == True:
+            self.bn1 = nn.BatchNorm2d(features)
+            self.bn2 = nn.BatchNorm2d(features)
+        self.activation = activation
+        self.skip_add = nn.quantized.FloatFunctional()
+    def forward(self, x):
+        """Forward pass.
+        Args:
+            x (tensor): input
+        Returns:
+            tensor: output
+        """
+        out = self.activation(x)
+        out = self.conv1(out)
+        if self.bn == True:
+            out = self.bn1(out)
+        out = self.activation(out)
+        out = self.conv2(out)
+        if self.bn == True:
+            out = self.bn2(out)
+        if self.groups > 1:
+            out = self.conv_merge(out)
+        return self.skip_add.add(out, x)
+class FeatureFusionBlock(nn.Module):
+    """Feature fusion block.
+    """
+    def __init__(
+        self,
+        features,
+        activation,
+        deconv=False,
+        bn=False,
+        expand=False,
+        align_corners=True,
+        size=None
+    ):
+        """Init.
+        Args:
+            features (int): number of features
+        """
+        super(FeatureFusionBlock, self).__init__()
+        self.deconv = deconv
+        self.align_corners = align_corners
+        self.groups=1
+        self.expand = expand
+        out_features = features
+        if self.expand == True:
+            out_features = features // 2
+        self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
+        self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
+        self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
+        self.skip_add = nn.quantized.FloatFunctional()
+        self.size=size
+    def forward(self, *xs, size=None):
+        """Forward pass.
+        Returns:
+            tensor: output
+        """
+        output = xs[0]
+        if len(xs) == 2:
+            res = self.resConfUnit1(xs[1])
+            output = self.skip_add.add(output, res)
+        output = self.resConfUnit2(output)
+        if (size is None) and (self.size is None):
+            modifier = {"scale_factor": 2}
+        elif size is None:
+            modifier = {"size": self.size}
+        else:
+            modifier = {"size": size}
+        output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
+        output = self.out_conv(output)
+        return output

src/lari/model/dinoseg_model.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from typing import *
+from numbers import Number
+from functools import partial
+from pathlib import Path
+import importlib
+import warnings
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils
+import torch.utils.checkpoint
+import torch.version
+from huggingface_hub import hf_hub_download
+from src.lari.model.utils import wrap_dinov2_attention_with_sdpa, wrap_module_with_gradient_checkpointing, unwrap_module_with_gradient_checkpointing
+from src.lari.model.dpt_seg_head import DPTSegHead
+class DinoSegModel(nn.Module):
+    def __init__(self,
+        encoder: str = 'dinov2_vitl14',
+        intermediate_layers: Union[int, List[int]] = 4,
+        dim_proj: int = 512,
+        use_pretrained: Literal["dinov2", "moge_full", "moge_backbone", None] = None,
+        pretrained_path: str = None,
+        num_output_layer: str = None,
+        output_type: str = "ray_stop", # "seg_sep"
+        **deprecated_kwargs
+    ):
+        super(DinoSegModel, self).__init__()
+        if deprecated_kwargs:
+            warnings.warn(f"The following deprecated/invalid arguments are ignored: {deprecated_kwargs}")
+        self.encoder = encoder
+        self.intermediate_layers = intermediate_layers
+        self.use_pretrained = use_pretrained
+        self.pretrained_path = pretrained_path
+        self.num_output_layer = num_output_layer
+        self.output_type = output_type
+        assert self.output_type in ["seg_sep", "ray_stop"]
+        hub_loader = getattr(importlib.import_module(".dinov2.hub.backbones", __package__), encoder)
+        self.backbone = hub_loader(pretrained=True if self.use_pretrained == "dinov2" else False)
+        dim_feature = self.backbone.blocks[0].attn.qkv.in_features
+        self.head = DPTSegHead(in_channels=dim_feature,
+                                features=dim_proj,
+                                use_bn=True,
+                                out_channels=[256, 512, 1024, 1024],
+                                use_clstoken=False,
+                                num_classes = num_output_layer,
+                                output_type = self.output_type
+                                )
+        if torch.__version__ >= '2.0':
+            self.enable_pytorch_native_sdpa()
+        self._load_pretrained()
+    def _load_pretrained(self):
+        '''
+        Load data from MoGe model
+        '''
+        return
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Union[str, Path, IO[bytes]], model_kwargs: Optional[Dict[str, Any]] = None, **hf_kwargs) -> 'DinoSegModel':
+        """
+        Load a model from a checkpoint file.
+        ### Parameters:
+        - `pretrained_model_name_or_path`: path to the checkpoint file or repo id.
+        - `model_kwargs`: additional keyword arguments to override the parameters in the checkpoint.
+        - `hf_kwargs`: additional keyword arguments to pass to the `hf_hub_download` function. Ignored if `pretrained_model_name_or_path` is a local path.
+        ### Returns:
+        - A new instance of `MoGe` with the parameters loaded from the checkpoint.
+        """
+        if Path(pretrained_model_name_or_path).exists():
+            checkpoint = torch.load(pretrained_model_name_or_path, map_location='cpu', weights_only=True)
+        else:
+            cached_checkpoint_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path,
+                repo_type="model",
+                filename="model.pt",
+                **hf_kwargs
+            )
+            checkpoint = torch.load(cached_checkpoint_path, map_location='cpu', weights_only=True)
+        model_config = checkpoint['model_config']
+        if model_kwargs is not None:
+            model_config.update(model_kwargs)
+        model = cls(**model_config)
+        model.load_state_dict(checkpoint['model'])
+        return model
+    @staticmethod
+    def cache_pretrained_backbone(encoder: str, pretrained: bool):
+        _ = torch.hub.load('facebookresearch/dinov2', encoder, pretrained=pretrained)
+    def load_pretrained_backbone(self):
+        "Load the backbone with pretrained dinov2 weights from torch hub"
+        state_dict = torch.hub.load('facebookresearch/dinov2', self.encoder, pretrained=True).state_dict()
+        self.backbone.load_state_dict(state_dict)
+    def enable_backbone_gradient_checkpointing(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i] = wrap_module_with_gradient_checkpointing(self.backbone.blocks[i])
+    def enable_pytorch_native_sdpa(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i].attn = wrap_dinov2_attention_with_sdpa(self.backbone.blocks[i].attn)
+    def forward(self, image: torch.Tensor, mixed_precision: bool = False) -> Dict[str, torch.Tensor]:
+        raw_img_h, raw_img_w = image.shape[-2:]
+        patch_h, patch_w = raw_img_h // 14, raw_img_w // 14
+        # Apply image transformation for DINOv2
+        image_14 = F.interpolate(image, (patch_h * 14, patch_w * 14), mode="bilinear", align_corners=False, antialias=True)
+        # Get intermediate layers from the backbone
+        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=mixed_precision):
+            features = self.backbone.get_intermediate_layers(image_14, self.intermediate_layers, return_class_token=True)
+        # Predict points and mask (mask scores)
+        mask = self.head(features, patch_h, patch_w)
+        # b c h w
+        mask = F.interpolate(mask, (raw_img_h, raw_img_w), mode="bilinear", align_corners=False)
+        out_dict = {}
+        if self.output_type == "seg_sep":
+            # mask = torch.nn.functional.sigmoid(mask) # for binary segmentation
+            out_dict["mask"] = mask.permute(0, 2, 3, 1).unsqueeze(-1) # B H W L 1
+        elif self.output_type == "ray_stop":
+            out_dict["seg_prob"] = mask # B L+1 H W
+        return out_dict

src/lari/model/dinov2/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+__version__ = "0.0.1"

src/lari/model/dinov2/hub/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

src/lari/model/dinov2/hub/backbones.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from enum import Enum
+from typing import Union
+import torch
+from .utils import _DINOV2_BASE_URL, _make_dinov2_model_name
+class Weights(Enum):
+    LVD142M = "LVD142M"
+def _make_dinov2_model(
+    *,
+    arch_name: str = "vit_large",
+    img_size: int = 518,
+    patch_size: int = 14,
+    init_values: float = 1.0,
+    ffn_layer: str = "mlp",
+    block_chunks: int = 0,
+    num_register_tokens: int = 0,
+    interpolate_antialias: bool = False,
+    interpolate_offset: float = 0.1,
+    pretrained: bool = True,
+    weights: Union[Weights, str] = Weights.LVD142M,
+    **kwargs,
+):
+    from ..models import vision_transformer as vits
+    if isinstance(weights, str):
+        try:
+            weights = Weights[weights]
+        except KeyError:
+            raise AssertionError(f"Unsupported weights: {weights}")
+    model_base_name = _make_dinov2_model_name(arch_name, patch_size)
+    vit_kwargs = dict(
+        img_size=img_size,
+        patch_size=patch_size,
+        init_values=init_values,
+        ffn_layer=ffn_layer,
+        block_chunks=block_chunks,
+        num_register_tokens=num_register_tokens,
+        interpolate_antialias=interpolate_antialias,
+        interpolate_offset=interpolate_offset,
+    )
+    vit_kwargs.update(**kwargs)
+    model = vits.__dict__[arch_name](**vit_kwargs)
+    if pretrained:
+        model_full_name = _make_dinov2_model_name(arch_name, patch_size, num_register_tokens)
+        url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth"
+        state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu")
+        model.load_state_dict(state_dict, strict=True)
+    return model
+def dinov2_vits14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitb14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitl14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs)
+def dinov2_vitg14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_giant2",
+        ffn_layer="swiglufused",
+        weights=weights,
+        pretrained=pretrained,
+        **kwargs,
+    )
+def dinov2_vits14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_small",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitb14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_base",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitl14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_large",
+        pretrained=pretrained,
+        weights=weights,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )
+def dinov2_vitg14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
+    """
+    DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset.
+    """
+    return _make_dinov2_model(
+        arch_name="vit_giant2",
+        ffn_layer="swiglufused",
+        weights=weights,
+        pretrained=pretrained,
+        num_register_tokens=4,
+        interpolate_antialias=True,
+        interpolate_offset=0.0,
+        **kwargs,
+    )

src/lari/model/dinov2/hub/utils.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import itertools
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+_DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
+def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str:
+    compact_arch_name = arch_name.replace("_", "")[:4]
+    registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
+    return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"
+class CenterPadding(nn.Module):
+    def __init__(self, multiple):
+        super().__init__()
+        self.multiple = multiple
+    def _get_pad(self, size):
+        new_size = math.ceil(size / self.multiple) * self.multiple
+        pad_size = new_size - size
+        pad_size_left = pad_size // 2
+        pad_size_right = pad_size - pad_size_left
+        return pad_size_left, pad_size_right
+    @torch.inference_mode()
+    def forward(self, x):
+        pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1]))
+        output = F.pad(x, pads)
+        return output

src/lari/model/dinov2/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .dino_head import DINOHead
+from .mlp import Mlp
+from .patch_embed import PatchEmbed
+from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
+from .block import NestedTensorBlock
+from .attention import MemEffAttention

src/lari/model/dinov2/layers/attention.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
+import logging
+import os
+import warnings
+from torch import Tensor
+from torch import nn
+logger = logging.getLogger("dinov2")
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import memory_efficient_attention, unbind
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (Attention)")
+    else:
+        # warnings.warn("xFormers is disabled (Attention)")
+        raise ImportError
+except ImportError:
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (Attention)")
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int = 8,
+        qkv_bias: bool = False,
+        proj_bias: bool = True,
+        attn_drop: float = 0.0,
+        proj_drop: float = 0.0,
+    ) -> None:
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim**-0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim, bias=proj_bias)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x: Tensor, attn_bias=None) -> Tensor:
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
+        attn = q @ k.transpose(-2, -1)
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class MemEffAttention(Attention):
+    def forward(self, x: Tensor, attn_bias=None) -> Tensor:
+        if not XFORMERS_AVAILABLE:
+            if attn_bias is not None:
+                raise AssertionError("xFormers is required for using nested tensors")
+            return super().forward(x)
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        q, k, v = unbind(qkv, 2)
+        x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
+        x = x.reshape([B, N, C])
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x

src/lari/model/dinov2/layers/block.py ADDED Viewed

	@@ -0,0 +1,259 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
+import logging
+import os
+from typing import Callable, List, Any, Tuple, Dict
+import warnings
+import torch
+from torch import nn, Tensor
+from .attention import Attention, MemEffAttention
+from .drop_path import DropPath
+from .layer_scale import LayerScale
+from .mlp import Mlp
+logger = logging.getLogger("dinov2")
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import fmha, scaled_index_add, index_select_cat
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (Block)")
+    else:
+        # warnings.warn("xFormers is disabled (Block)")
+        raise ImportError
+except ImportError:
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (Block)")
+class Block(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        proj_bias: bool = True,
+        ffn_bias: bool = True,
+        drop: float = 0.0,
+        attn_drop: float = 0.0,
+        init_values=None,
+        drop_path: float = 0.0,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
+        attn_class: Callable[..., nn.Module] = Attention,
+        ffn_layer: Callable[..., nn.Module] = Mlp,
+    ) -> None:
+        super().__init__()
+        # print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}")
+        self.norm1 = norm_layer(dim)
+        self.attn = attn_class(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            proj_bias=proj_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = ffn_layer(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+            bias=ffn_bias,
+        )
+        self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.sample_drop_ratio = drop_path
+    def forward(self, x: Tensor) -> Tensor:
+        def attn_residual_func(x: Tensor) -> Tensor:
+            return self.ls1(self.attn(self.norm1(x)))
+        def ffn_residual_func(x: Tensor) -> Tensor:
+            return self.ls2(self.mlp(self.norm2(x)))
+        if self.training and self.sample_drop_ratio > 0.1:
+            # the overhead is compensated only for a drop path rate larger than 0.1
+            x = drop_add_residual_stochastic_depth(
+                x,
+                residual_func=attn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+            )
+            x = drop_add_residual_stochastic_depth(
+                x,
+                residual_func=ffn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+            )
+        elif self.training and self.sample_drop_ratio > 0.0:
+            x = x + self.drop_path1(attn_residual_func(x))
+            x = x + self.drop_path1(ffn_residual_func(x))  # FIXME: drop_path2
+        else:
+            x = x + attn_residual_func(x)
+            x = x + ffn_residual_func(x)
+        return x
+def drop_add_residual_stochastic_depth(
+    x: Tensor,
+    residual_func: Callable[[Tensor], Tensor],
+    sample_drop_ratio: float = 0.0,
+) -> Tensor:
+    # 1) extract subset using permutation
+    b, n, d = x.shape
+    sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
+    brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
+    x_subset = x[brange]
+    # 2) apply residual_func to get residual
+    residual = residual_func(x_subset)
+    x_flat = x.flatten(1)
+    residual = residual.flatten(1)
+    residual_scale_factor = b / sample_subset_size
+    # 3) add the residual
+    x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
+    return x_plus_residual.view_as(x)
+def get_branges_scales(x, sample_drop_ratio=0.0):
+    b, n, d = x.shape
+    sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
+    brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
+    residual_scale_factor = b / sample_subset_size
+    return brange, residual_scale_factor
+def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None):
+    if scaling_vector is None:
+        x_flat = x.flatten(1)
+        residual = residual.flatten(1)
+        x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
+    else:
+        x_plus_residual = scaled_index_add(
+            x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor
+        )
+    return x_plus_residual
+attn_bias_cache: Dict[Tuple, Any] = {}
+def get_attn_bias_and_cat(x_list, branges=None):
+    """
+    this will perform the index select, cat the tensors, and provide the attn_bias from cache
+    """
+    batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
+    all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
+    if all_shapes not in attn_bias_cache.keys():
+        seqlens = []
+        for b, x in zip(batch_sizes, x_list):
+            for _ in range(b):
+                seqlens.append(x.shape[1])
+        attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
+        attn_bias._batch_sizes = batch_sizes
+        attn_bias_cache[all_shapes] = attn_bias
+    if branges is not None:
+        cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1])
+    else:
+        tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list)
+        cat_tensors = torch.cat(tensors_bs1, dim=1)
+    return attn_bias_cache[all_shapes], cat_tensors
+def drop_add_residual_stochastic_depth_list(
+    x_list: List[Tensor],
+    residual_func: Callable[[Tensor, Any], Tensor],
+    sample_drop_ratio: float = 0.0,
+    scaling_vector=None,
+) -> Tensor:
+    # 1) generate random set of indices for dropping samples in the batch
+    branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list]
+    branges = [s[0] for s in branges_scales]
+    residual_scale_factors = [s[1] for s in branges_scales]
+    # 2) get attention bias and index+concat the tensors
+    attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges)
+    # 3) apply residual_func to get residual, and split the result
+    residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias))  # type: ignore
+    outputs = []
+    for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
+        outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
+    return outputs
+class NestedTensorBlock(Block):
+    def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]:
+        """
+        x_list contains a list of tensors to nest together and run
+        """
+        assert isinstance(self.attn, MemEffAttention)
+        if self.training and self.sample_drop_ratio > 0.0:
+            def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.attn(self.norm1(x), attn_bias=attn_bias)
+            def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.mlp(self.norm2(x))
+            x_list = drop_add_residual_stochastic_depth_list(
+                x_list,
+                residual_func=attn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+                scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None,
+            )
+            x_list = drop_add_residual_stochastic_depth_list(
+                x_list,
+                residual_func=ffn_residual_func,
+                sample_drop_ratio=self.sample_drop_ratio,
+                scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None,
+            )
+            return x_list
+        else:
+            def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias))
+            def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
+                return self.ls2(self.mlp(self.norm2(x)))
+            attn_bias, x = get_attn_bias_and_cat(x_list)
+            x = x + attn_residual_func(x, attn_bias=attn_bias)
+            x = x + ffn_residual_func(x)
+            return attn_bias.split(x)
+    def forward(self, x_or_x_list):
+        if isinstance(x_or_x_list, Tensor):
+            return super().forward(x_or_x_list)
+        elif isinstance(x_or_x_list, list):
+            if not XFORMERS_AVAILABLE:
+                raise AssertionError("xFormers is required for using nested tensors")
+            return self.forward_nested(x_or_x_list)
+        else:
+            raise AssertionError

src/lari/model/dinov2/layers/dino_head.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+from torch.nn.init import trunc_normal_
+from torch.nn.utils import weight_norm
+class DINOHead(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        use_bn=False,
+        nlayers=3,
+        hidden_dim=2048,
+        bottleneck_dim=256,
+        mlp_bias=True,
+    ):
+        super().__init__()
+        nlayers = max(nlayers, 1)
+        self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias)
+        self.apply(self._init_weights)
+        self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
+        self.last_layer.weight_g.data.fill_(1)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = self.mlp(x)
+        eps = 1e-6 if x.dtype == torch.float16 else 1e-12
+        x = nn.functional.normalize(x, dim=-1, p=2, eps=eps)
+        x = self.last_layer(x)
+        return x
+def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True):
+    if nlayers == 1:
+        return nn.Linear(in_dim, bottleneck_dim, bias=bias)
+    else:
+        layers = [nn.Linear(in_dim, hidden_dim, bias=bias)]
+        if use_bn:
+            layers.append(nn.BatchNorm1d(hidden_dim))
+        layers.append(nn.GELU())
+        for _ in range(nlayers - 2):
+            layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias))
+            if use_bn:
+                layers.append(nn.BatchNorm1d(hidden_dim))
+            layers.append(nn.GELU())
+        layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias))
+        return nn.Sequential(*layers)

src/lari/model/dinov2/layers/drop_path.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
+from torch import nn
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0:
+        random_tensor.div_(keep_prob)
+    output = x * random_tensor
+    return output
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)

src/lari/model/dinov2/layers/layer_scale.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
+from typing import Union
+import torch
+from torch import Tensor
+from torch import nn
+class LayerScale(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        init_values: Union[float, Tensor] = 1e-5,
+        inplace: bool = False,
+    ) -> None:
+        super().__init__()
+        self.inplace = inplace
+        self.gamma = nn.Parameter(init_values * torch.ones(dim))
+    def forward(self, x: Tensor) -> Tensor:
+        return x.mul_(self.gamma) if self.inplace else x * self.gamma

src/lari/model/dinov2/layers/mlp.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
+from typing import Callable, Optional
+from torch import Tensor, nn
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x

src/lari/model/dinov2/layers/patch_embed.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
+from typing import Callable, Optional, Tuple, Union
+from torch import Tensor
+import torch.nn as nn
+def make_2tuple(x):
+    if isinstance(x, tuple):
+        assert len(x) == 2
+        return x
+    assert isinstance(x, int)
+    return (x, x)
+class PatchEmbed(nn.Module):
+    """
+    2D image to patch embedding: (B,C,H,W) -> (B,N,D)
+    Args:
+        img_size: Image size.
+        patch_size: Patch token size.
+        in_chans: Number of input image channels.
+        embed_dim: Number of linear projection output channels.
+        norm_layer: Normalization layer.
+    """
+    def __init__(
+        self,
+        img_size: Union[int, Tuple[int, int]] = 224,
+        patch_size: Union[int, Tuple[int, int]] = 16,
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        norm_layer: Optional[Callable] = None,
+        flatten_embedding: bool = True,
+    ) -> None:
+        super().__init__()
+        image_HW = make_2tuple(img_size)
+        patch_HW = make_2tuple(patch_size)
+        patch_grid_size = (
+            image_HW[0] // patch_HW[0],
+            image_HW[1] // patch_HW[1],
+        )
+        self.img_size = image_HW
+        self.patch_size = patch_HW
+        self.patches_resolution = patch_grid_size
+        self.num_patches = patch_grid_size[0] * patch_grid_size[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.flatten_embedding = flatten_embedding
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
+        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
+    def forward(self, x: Tensor) -> Tensor:
+        _, _, H, W = x.shape
+        patch_H, patch_W = self.patch_size
+        assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
+        assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
+        x = self.proj(x)  # B C H W
+        H, W = x.size(2), x.size(3)
+        x = x.flatten(2).transpose(1, 2)  # B HW C
+        x = self.norm(x)
+        if not self.flatten_embedding:
+            x = x.reshape(-1, H, W, self.embed_dim)  # B H W C
+        return x
+    def flops(self) -> float:
+        Ho, Wo = self.patches_resolution
+        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
+        if self.norm is not None:
+            flops += Ho * Wo * self.embed_dim
+        return flops

src/lari/model/dinov2/layers/swiglu_ffn.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import os
+from typing import Callable, Optional
+import warnings
+from torch import Tensor, nn
+import torch.nn.functional as F
+class SwiGLUFFN(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = None,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
+        self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
+    def forward(self, x: Tensor) -> Tensor:
+        x12 = self.w12(x)
+        x1, x2 = x12.chunk(2, dim=-1)
+        hidden = F.silu(x1) * x2
+        return self.w3(hidden)
+XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None
+try:
+    if XFORMERS_ENABLED:
+        from xformers.ops import SwiGLU
+        XFORMERS_AVAILABLE = True
+        # warnings.warn("xFormers is available (SwiGLU)")
+    else:
+        # warnings.warn("xFormers is disabled (SwiGLU)")
+        raise ImportError
+except ImportError:
+    SwiGLU = SwiGLUFFN
+    XFORMERS_AVAILABLE = False
+    # warnings.warn("xFormers is not available (SwiGLU)")
+class SwiGLUFFNFused(SwiGLU):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = None,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
+        super().__init__(
+            in_features=in_features,
+            hidden_features=hidden_features,
+            out_features=out_features,
+            bias=bias,
+        )

src/lari/model/dinov2/models/__init__.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+from . import vision_transformer as vits
+logger = logging.getLogger("dinov2")
+def build_model(args, only_teacher=False, img_size=224):
+    args.arch = args.arch.removesuffix("_memeff")
+    if "vit" in args.arch:
+        vit_kwargs = dict(
+            img_size=img_size,
+            patch_size=args.patch_size,
+            init_values=args.layerscale,
+            ffn_layer=args.ffn_layer,
+            block_chunks=args.block_chunks,
+            qkv_bias=args.qkv_bias,
+            proj_bias=args.proj_bias,
+            ffn_bias=args.ffn_bias,
+            num_register_tokens=args.num_register_tokens,
+            interpolate_offset=args.interpolate_offset,
+            interpolate_antialias=args.interpolate_antialias,
+        )
+        teacher = vits.__dict__[args.arch](**vit_kwargs)
+        if only_teacher:
+            return teacher, teacher.embed_dim
+        student = vits.__dict__[args.arch](
+            **vit_kwargs,
+            drop_path_rate=args.drop_path_rate,
+            drop_path_uniform=args.drop_path_uniform,
+        )
+        embed_dim = student.embed_dim
+    return student, teacher, embed_dim
+def build_model_from_cfg(cfg, only_teacher=False):
+    return build_model(cfg.student, only_teacher=only_teacher, img_size=cfg.crops.global_crops_size)

src/lari/model/dinov2/models/vision_transformer.py ADDED Viewed

	@@ -0,0 +1,396 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+# References:
+#   https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
+#   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
+from functools import partial
+import math
+import logging
+from typing import Sequence, Tuple, Union, Callable
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint
+from torch.nn.init import trunc_normal_
+from ..layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block
+logger = logging.getLogger("dinov2")
+def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
+    if not depth_first and include_root:
+        fn(module=module, name=name)
+    for child_name, child_module in module.named_children():
+        child_name = ".".join((name, child_name)) if name else child_name
+        named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
+    if depth_first and include_root:
+        fn(module=module, name=name)
+    return module
+class BlockChunk(nn.ModuleList):
+    def forward(self, x):
+        for b in self:
+            x = b(x)
+        return x
+class DinoVisionTransformer(nn.Module):
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        ffn_bias=True,
+        proj_bias=True,
+        drop_path_rate=0.0,
+        drop_path_uniform=False,
+        init_values=None,  # for layerscale: None or 0 => no layerscale
+        embed_layer=PatchEmbed,
+        act_layer=nn.GELU,
+        block_fn=Block,
+        ffn_layer="mlp",
+        block_chunks=1,
+        num_register_tokens=0,
+        interpolate_antialias=False,
+        interpolate_offset=0.1,
+    ):
+        """
+        Args:
+            img_size (int, tuple): input image size
+            patch_size (int, tuple): patch size
+            in_chans (int): number of input channels
+            embed_dim (int): embedding dimension
+            depth (int): depth of transformer
+            num_heads (int): number of attention heads
+            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+            qkv_bias (bool): enable bias for qkv if True
+            proj_bias (bool): enable bias for proj in attn if True
+            ffn_bias (bool): enable bias for ffn if True
+            drop_path_rate (float): stochastic depth rate
+            drop_path_uniform (bool): apply uniform drop rate across blocks
+            weight_init (str): weight init scheme
+            init_values (float): layer-scale init values
+            embed_layer (nn.Module): patch embedding layer
+            act_layer (nn.Module): MLP activation layer
+            block_fn (nn.Module): transformer block class
+            ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity"
+            block_chunks: (int) split block sequence into block_chunks units for FSDP wrap
+            num_register_tokens: (int) number of extra cls tokens (so-called "registers")
+            interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings
+            interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings
+        """
+        super().__init__()
+        norm_layer = partial(nn.LayerNorm, eps=1e-6)
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.num_tokens = 1
+        self.n_blocks = depth
+        self.num_heads = num_heads
+        self.patch_size = patch_size
+        self.num_register_tokens = num_register_tokens
+        self.interpolate_antialias = interpolate_antialias
+        self.interpolate_offset = interpolate_offset
+        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
+        assert num_register_tokens >= 0
+        self.register_tokens = (
+            nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None
+        )
+        if drop_path_uniform is True:
+            dpr = [drop_path_rate] * depth
+        else:
+            dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        if ffn_layer == "mlp":
+            logger.info("using MLP layer as FFN")
+            ffn_layer = Mlp
+        elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
+            logger.info("using SwiGLU layer as FFN")
+            ffn_layer = SwiGLUFFNFused
+        elif ffn_layer == "identity":
+            logger.info("using Identity layer as FFN")
+            def f(*args, **kwargs):
+                return nn.Identity()
+            ffn_layer = f
+        else:
+            raise NotImplementedError
+        blocks_list = [
+            block_fn(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                proj_bias=proj_bias,
+                ffn_bias=ffn_bias,
+                drop_path=dpr[i],
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                ffn_layer=ffn_layer,
+                init_values=init_values,
+            )
+            for i in range(depth)
+        ]
+        if block_chunks > 0:
+            self.chunked_blocks = True
+            chunked_blocks = []
+            chunksize = depth // block_chunks
+            for i in range(0, depth, chunksize):
+                # this is to keep the block index consistent if we chunk the block list
+                chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize])
+            self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks])
+        else:
+            self.chunked_blocks = False
+            self.blocks = nn.ModuleList(blocks_list)
+        self.norm = norm_layer(embed_dim)
+        self.head = nn.Identity()
+        self.mask_token = nn.Parameter(torch.zeros(1, embed_dim))
+        self.init_weights()
+    def init_weights(self):
+        trunc_normal_(self.pos_embed, std=0.02)
+        nn.init.normal_(self.cls_token, std=1e-6)
+        if self.register_tokens is not None:
+            nn.init.normal_(self.register_tokens, std=1e-6)
+        named_apply(init_weights_vit_timm, self)
+    def interpolate_pos_encoding(self, x, w, h):
+        previous_dtype = x.dtype
+        npatch = x.shape[1] - 1
+        N = self.pos_embed.shape[1] - 1
+        if npatch == N and w == h:
+            return self.pos_embed
+        pos_embed = self.pos_embed.float()
+        class_pos_embed = pos_embed[:, 0]
+        patch_pos_embed = pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // self.patch_size
+        h0 = h // self.patch_size
+        M = int(math.sqrt(N))  # Recover the number of patches in each dimension
+        assert N == M * M
+        kwargs = {}
+        if self.interpolate_offset:
+            # Historical kludge: add a small number to avoid floating point error in the interpolation, see https://github.com/facebookresearch/dino/issues/8
+            # Note: still needed for backward-compatibility, the underlying operators are using both output size and scale factors
+            sx = float(w0 + self.interpolate_offset) / M
+            sy = float(h0 + self.interpolate_offset) / M
+            kwargs["scale_factor"] = (sx, sy)
+        else:
+            # Simply specify an output size instead of a scale factor
+            kwargs["size"] = (w0, h0)
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(1, M, M, dim).permute(0, 3, 1, 2),
+            mode="bicubic",
+            antialias=self.interpolate_antialias,
+            **kwargs,
+        )
+        assert (w0, h0) == patch_pos_embed.shape[-2:]
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype)
+    def prepare_tokens_with_masks(self, x, masks=None):
+        B, nc, w, h = x.shape
+        x = self.patch_embed(x)
+        if masks is not None:
+            x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x)
+        x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
+        x = x + self.interpolate_pos_encoding(x, w, h)
+        if self.register_tokens is not None:
+            x = torch.cat(
+                (
+                    x[:, :1],
+                    self.register_tokens.expand(x.shape[0], -1, -1),
+                    x[:, 1:],
+                ),
+                dim=1,
+            )
+        return x
+    def forward_features_list(self, x_list, masks_list):
+        x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
+        for blk in self.blocks:
+            x = blk(x)
+        all_x = x
+        output = []
+        for x, masks in zip(all_x, masks_list):
+            x_norm = self.norm(x)
+            output.append(
+                {
+                    "x_norm_clstoken": x_norm[:, 0],
+                    "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
+                    "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
+                    "x_prenorm": x,
+                    "masks": masks,
+                }
+            )
+        return output
+    def forward_features(self, x, masks=None):
+        if isinstance(x, list):
+            return self.forward_features_list(x, masks)
+        x = self.prepare_tokens_with_masks(x, masks)
+        for blk in self.blocks:
+            x = blk(x)
+        x_norm = self.norm(x)
+        return {
+            "x_norm_clstoken": x_norm[:, 0],
+            "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
+            "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
+            "x_prenorm": x,
+            "masks": masks,
+        }
+    def _get_intermediate_layers_not_chunked(self, x, n=1):
+        x = self.prepare_tokens_with_masks(x)
+        # If n is an int, take the n last blocks. If it's a list, take them
+        output, total_block_len = [], len(self.blocks)
+        blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if i in blocks_to_take:
+                output.append(x)
+        assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
+        return output
+    def _get_intermediate_layers_chunked(self, x, n=1):
+        x = self.prepare_tokens_with_masks(x)
+        output, i, total_block_len = [], 0, len(self.blocks[-1])
+        # If n is an int, take the n last blocks. If it's a list, take them
+        blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
+        for block_chunk in self.blocks:
+            for blk in block_chunk[i:]:  # Passing the nn.Identity()
+                x = blk(x)
+                if i in blocks_to_take:
+                    output.append(x)
+                i += 1
+        assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
+        return output
+    def get_intermediate_layers(
+        self,
+        x: torch.Tensor,
+        n: Union[int, Sequence] = 1,  # Layers or n last layers to take
+        reshape: bool = False,
+        return_class_token: bool = False,
+        norm=True,
+    ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
+        if self.chunked_blocks:
+            outputs = self._get_intermediate_layers_chunked(x, n)
+        else:
+            outputs = self._get_intermediate_layers_not_chunked(x, n)
+        if norm:
+            outputs = [self.norm(out) for out in outputs]
+        class_tokens = [out[:, 0] for out in outputs]
+        outputs = [out[:, 1 + self.num_register_tokens :] for out in outputs]
+        if reshape:
+            B, _, w, h = x.shape
+            outputs = [
+                out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous()
+                for out in outputs
+            ]
+        if return_class_token:
+            return tuple(zip(outputs, class_tokens))
+        return tuple(outputs)
+    def forward(self, *args, is_training=False, **kwargs):
+        ret = self.forward_features(*args, **kwargs)
+        if is_training:
+            return ret
+        else:
+            return self.head(ret["x_norm_clstoken"])
+def init_weights_vit_timm(module: nn.Module, name: str = ""):
+    """ViT weight initialization, original timm impl (for reproducibility)"""
+    if isinstance(module, nn.Linear):
+        trunc_normal_(module.weight, std=0.02)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=384,
+        depth=12,
+        num_heads=6,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model
+def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
+    """
+    Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64
+    """
+    model = DinoVisionTransformer(
+        patch_size=patch_size,
+        embed_dim=1536,
+        depth=40,
+        num_heads=24,
+        mlp_ratio=4,
+        block_fn=partial(Block, attn_class=MemEffAttention),
+        num_register_tokens=num_register_tokens,
+        **kwargs,
+    )
+    return model

src/lari/model/dinov2/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

src/lari/model/dinov2/utils/cluster.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from enum import Enum
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+class ClusterType(Enum):
+    AWS = "aws"
+    FAIR = "fair"
+    RSC = "rsc"
+def _guess_cluster_type() -> ClusterType:
+    uname = os.uname()
+    if uname.sysname == "Linux":
+        if uname.release.endswith("-aws"):
+            # Linux kernel versions on AWS instances are of the form "5.4.0-1051-aws"
+            return ClusterType.AWS
+        elif uname.nodename.startswith("rsc"):
+            # Linux kernel versions on RSC instances are standard ones but hostnames start with "rsc"
+            return ClusterType.RSC
+    return ClusterType.FAIR
+def get_cluster_type(cluster_type: Optional[ClusterType] = None) -> Optional[ClusterType]:
+    if cluster_type is None:
+        return _guess_cluster_type()
+    return cluster_type
+def get_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type is None:
+        return None
+    CHECKPOINT_DIRNAMES = {
+        ClusterType.AWS: "checkpoints",
+        ClusterType.FAIR: "checkpoint",
+        ClusterType.RSC: "checkpoint/dino",
+    }
+    return Path("/") / CHECKPOINT_DIRNAMES[cluster_type]
+def get_user_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
+    checkpoint_path = get_checkpoint_path(cluster_type)
+    if checkpoint_path is None:
+        return None
+    username = os.environ.get("USER")
+    assert username is not None
+    return checkpoint_path / username
+def get_slurm_partition(cluster_type: Optional[ClusterType] = None) -> Optional[str]:
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type is None:
+        return None
+    SLURM_PARTITIONS = {
+        ClusterType.AWS: "learnlab",
+        ClusterType.FAIR: "learnlab",
+        ClusterType.RSC: "learn",
+    }
+    return SLURM_PARTITIONS[cluster_type]
+def get_slurm_executor_parameters(
+    nodes: int, num_gpus_per_node: int, cluster_type: Optional[ClusterType] = None, **kwargs
+) -> Dict[str, Any]:
+    # create default parameters
+    params = {
+        "mem_gb": 0,  # Requests all memory on a node, see https://slurm.schedmd.com/sbatch.html
+        "gpus_per_node": num_gpus_per_node,
+        "tasks_per_node": num_gpus_per_node,  # one task per GPU
+        "cpus_per_task": 10,
+        "nodes": nodes,
+        "slurm_partition": get_slurm_partition(cluster_type),
+    }
+    # apply cluster-specific adjustments
+    cluster_type = get_cluster_type(cluster_type)
+    if cluster_type == ClusterType.AWS:
+        params["cpus_per_task"] = 12
+        del params["mem_gb"]
+    elif cluster_type == ClusterType.RSC:
+        params["cpus_per_task"] = 12
+    # set additional parameters / apply overrides
+    params.update(kwargs)
+    return params

src/lari/model/dinov2/utils/config.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import math
+import logging
+import os
+from omegaconf import OmegaConf
+import dinov2.distributed as distributed
+from dinov2.logging import setup_logging
+from dinov2.utils import utils
+from dinov2.configs import dinov2_default_config
+logger = logging.getLogger("dinov2")
+def apply_scaling_rules_to_cfg(cfg):  # to fix
+    if cfg.optim.scaling_rule == "sqrt_wrt_1024":
+        base_lr = cfg.optim.base_lr
+        cfg.optim.lr = base_lr
+        cfg.optim.lr *= math.sqrt(cfg.train.batch_size_per_gpu * distributed.get_global_size() / 1024.0)
+        logger.info(f"sqrt scaling learning rate; base: {base_lr}, new: {cfg.optim.lr}")
+    else:
+        raise NotImplementedError
+    return cfg
+def write_config(cfg, output_dir, name="config.yaml"):
+    logger.info(OmegaConf.to_yaml(cfg))
+    saved_cfg_path = os.path.join(output_dir, name)
+    with open(saved_cfg_path, "w") as f:
+        OmegaConf.save(config=cfg, f=f)
+    return saved_cfg_path
+def get_cfg_from_args(args):
+    args.output_dir = os.path.abspath(args.output_dir)
+    args.opts += [f"train.output_dir={args.output_dir}"]
+    default_cfg = OmegaConf.create(dinov2_default_config)
+    cfg = OmegaConf.load(args.config_file)
+    cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
+    return cfg
+def default_setup(args):
+    distributed.enable(overwrite=True)
+    seed = getattr(args, "seed", 0)
+    rank = distributed.get_global_rank()
+    global logger
+    setup_logging(output=args.output_dir, level=logging.INFO)
+    logger = logging.getLogger("dinov2")
+    utils.fix_random_seeds(seed + rank)
+    logger.info("git:\n  {}\n".format(utils.get_sha()))
+    logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg_from_args(args)
+    os.makedirs(args.output_dir, exist_ok=True)
+    default_setup(args)
+    apply_scaling_rules_to_cfg(cfg)
+    write_config(cfg, args.output_dir)
+    return cfg

src/lari/model/dinov2/utils/dtype.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from typing import Dict, Union
+import numpy as np
+import torch
+TypeSpec = Union[str, np.dtype, torch.dtype]
+_NUMPY_TO_TORCH_DTYPE: Dict[np.dtype, torch.dtype] = {
+    np.dtype("bool"): torch.bool,
+    np.dtype("uint8"): torch.uint8,
+    np.dtype("int8"): torch.int8,
+    np.dtype("int16"): torch.int16,
+    np.dtype("int32"): torch.int32,
+    np.dtype("int64"): torch.int64,
+    np.dtype("float16"): torch.float16,
+    np.dtype("float32"): torch.float32,
+    np.dtype("float64"): torch.float64,
+    np.dtype("complex64"): torch.complex64,
+    np.dtype("complex128"): torch.complex128,
+}
+def as_torch_dtype(dtype: TypeSpec) -> torch.dtype:
+    if isinstance(dtype, torch.dtype):
+        return dtype
+    if isinstance(dtype, str):
+        dtype = np.dtype(dtype)
+    assert isinstance(dtype, np.dtype), f"Expected an instance of nunpy dtype, got {type(dtype)}"
+    return _NUMPY_TO_TORCH_DTYPE[dtype]

src/lari/model/dinov2/utils/param_groups.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from collections import defaultdict
+import logging
+logger = logging.getLogger("dinov2")
+def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12, force_is_backbone=False, chunked_blocks=False):
+    """
+    Calculate lr decay rate for different ViT blocks.
+    Args:
+        name (string): parameter name.
+        lr_decay_rate (float): base lr decay rate.
+        num_layers (int): number of ViT blocks.
+    Returns:
+        lr decay rate for the given parameter.
+    """
+    layer_id = num_layers + 1
+    if name.startswith("backbone") or force_is_backbone:
+        if (
+            ".pos_embed" in name
+            or ".patch_embed" in name
+            or ".mask_token" in name
+            or ".cls_token" in name
+            or ".register_tokens" in name
+        ):
+            layer_id = 0
+        elif force_is_backbone and (
+            "pos_embed" in name
+            or "patch_embed" in name
+            or "mask_token" in name
+            or "cls_token" in name
+            or "register_tokens" in name
+        ):
+            layer_id = 0
+        elif ".blocks." in name and ".residual." not in name:
+            layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1
+        elif chunked_blocks and "blocks." in name and "residual." not in name:
+            layer_id = int(name[name.find("blocks.") :].split(".")[2]) + 1
+        elif "blocks." in name and "residual." not in name:
+            layer_id = int(name[name.find("blocks.") :].split(".")[1]) + 1
+    return lr_decay_rate ** (num_layers + 1 - layer_id)
+def get_params_groups_with_decay(model, lr_decay_rate=1.0, patch_embed_lr_mult=1.0):
+    chunked_blocks = False
+    if hasattr(model, "n_blocks"):
+        logger.info("chunked fsdp")
+        n_blocks = model.n_blocks
+        chunked_blocks = model.chunked_blocks
+    elif hasattr(model, "blocks"):
+        logger.info("first code branch")
+        n_blocks = len(model.blocks)
+    elif hasattr(model, "backbone"):
+        logger.info("second code branch")
+        n_blocks = len(model.backbone.blocks)
+    else:
+        logger.info("else code branch")
+        n_blocks = 0
+    all_param_groups = []
+    for name, param in model.named_parameters():
+        name = name.replace("_fsdp_wrapped_module.", "")
+        if not param.requires_grad:
+            continue
+        decay_rate = get_vit_lr_decay_rate(
+            name, lr_decay_rate, num_layers=n_blocks, force_is_backbone=n_blocks > 0, chunked_blocks=chunked_blocks
+        )
+        d = {"params": param, "is_last_layer": False, "lr_multiplier": decay_rate, "wd_multiplier": 1.0, "name": name}
+        if "last_layer" in name:
+            d.update({"is_last_layer": True})
+        if name.endswith(".bias") or "norm" in name or "gamma" in name:
+            d.update({"wd_multiplier": 0.0})
+        if "patch_embed" in name:
+            d.update({"lr_multiplier": d["lr_multiplier"] * patch_embed_lr_mult})
+        all_param_groups.append(d)
+        logger.info(f"""{name}: lr_multiplier: {d["lr_multiplier"]}, wd_multiplier: {d["wd_multiplier"]}""")
+    return all_param_groups
+def fuse_params_groups(all_params_groups, keys=("lr_multiplier", "wd_multiplier", "is_last_layer")):
+    fused_params_groups = defaultdict(lambda: {"params": []})
+    for d in all_params_groups:
+        identifier = ""
+        for k in keys:
+            identifier += k + str(d[k]) + "_"
+        for k in keys:
+            fused_params_groups[identifier][k] = d[k]
+        fused_params_groups[identifier]["params"].append(d["params"])
+    return fused_params_groups.values()

src/lari/model/dinov2/utils/utils.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+import os
+import random
+import subprocess
+from urllib.parse import urlparse
+import numpy as np
+import torch
+from torch import nn
+logger = logging.getLogger("dinov2")
+def load_pretrained_weights(model, pretrained_weights, checkpoint_key):
+    if urlparse(pretrained_weights).scheme:  # If it looks like an URL
+        state_dict = torch.hub.load_state_dict_from_url(pretrained_weights, map_location="cpu")
+    else:
+        state_dict = torch.load(pretrained_weights, map_location="cpu")
+    if checkpoint_key is not None and checkpoint_key in state_dict:
+        logger.info(f"Take key {checkpoint_key} in provided checkpoint dict")
+        state_dict = state_dict[checkpoint_key]
+    # remove `module.` prefix
+    state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+    # remove `backbone.` prefix induced by multicrop wrapper
+    state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
+    msg = model.load_state_dict(state_dict, strict=False)
+    logger.info("Pretrained weights found at {} and loaded with msg: {}".format(pretrained_weights, msg))
+def fix_random_seeds(seed=31):
+    """
+    Fix random seeds.
+    """
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+def get_sha():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
+    sha = "N/A"
+    diff = "clean"
+    branch = "N/A"
+    try:
+        sha = _run(["git", "rev-parse", "HEAD"])
+        subprocess.check_output(["git", "diff"], cwd=cwd)
+        diff = _run(["git", "diff-index", "HEAD"])
+        diff = "has uncommitted changes" if diff else "clean"
+        branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
+    except Exception:
+        pass
+    message = f"sha: {sha}, status: {diff}, branch: {branch}"
+    return message
+class CosineScheduler(object):
+    def __init__(self, base_value, final_value, total_iters, warmup_iters=0, start_warmup_value=0, freeze_iters=0):
+        super().__init__()
+        self.final_value = final_value
+        self.total_iters = total_iters
+        freeze_schedule = np.zeros((freeze_iters))
+        warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)
+        iters = np.arange(total_iters - warmup_iters - freeze_iters)
+        schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters)))
+        self.schedule = np.concatenate((freeze_schedule, warmup_schedule, schedule))
+        assert len(self.schedule) == self.total_iters
+    def __getitem__(self, it):
+        if it >= self.total_iters:
+            return self.final_value
+        else:
+            return self.schedule[it]
+def has_batchnorms(model):
+    bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)
+    for name, module in model.named_modules():
+        if isinstance(module, bn_types):
+            return True
+    return False

src/lari/model/dpt_seg_head.py ADDED Viewed

	@@ -0,0 +1,158 @@

+'''
+The code is modified based on Depth Anything and DPT
+'''
+from src.lari.model.blocks import FeatureFusionBlock, _make_scratch
+import cv2
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.transforms import Compose
+def _make_fusion_block(features, use_bn, size=None):
+    return FeatureFusionBlock(
+        features,
+        nn.ReLU(False),
+        deconv=False,
+        bn=use_bn,
+        expand=False,
+        align_corners=True,
+        size=size,
+    )
+class ConvBlock(nn.Module):
+    def __init__(self, in_feature, out_feature):
+        super().__init__()
+        self.conv_block = nn.Sequential(
+            nn.Conv2d(in_feature, out_feature, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(out_feature),
+            nn.ReLU(True)
+        )
+    def forward(self, x):
+        return self.conv_block(x)
+class DPTSegHead(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        features=256,
+        use_bn=False,
+        out_channels=[256, 512, 1024, 1024],
+        use_clstoken=False,
+        num_classes = 5,
+        output_type = "ray_stop" # "seg_sep"
+    ):
+        super(DPTSegHead, self).__init__()
+        self.use_clstoken = use_clstoken
+        self.output_type = output_type
+        # output one more layer to indicate the invalid ray-stopping point using index 0
+        self.num_classes = num_classes + 1 if self.output_type == "ray_stop" else num_classes
+        self.projects = nn.ModuleList([
+            nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_channel,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ) for out_channel in out_channels
+        ])
+        self.resize_layers = nn.ModuleList([
+            nn.ConvTranspose2d(
+                in_channels=out_channels[0],
+                out_channels=out_channels[0],
+                kernel_size=4,
+                stride=4,
+                padding=0),
+            nn.ConvTranspose2d(
+                in_channels=out_channels[1],
+                out_channels=out_channels[1],
+                kernel_size=2,
+                stride=2,
+                padding=0),
+            nn.Identity(),
+            nn.Conv2d(
+                in_channels=out_channels[3],
+                out_channels=out_channels[3],
+                kernel_size=3,
+                stride=2,
+                padding=1)
+        ])
+        if use_clstoken:
+            self.readout_projects = nn.ModuleList()
+            for _ in range(len(self.projects)):
+                self.readout_projects.append(
+                    nn.Sequential(
+                        nn.Linear(2 * in_channels, in_channels),
+                        nn.GELU()))
+        self.scratch = _make_scratch(
+            out_channels,
+            features,
+            groups=1,
+            expand=False,
+        )
+        self.scratch.stem_transpose = None
+        self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
+        self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
+        self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
+        self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
+        self.scratch.output_conv1 = nn.Sequential(
+            nn.Conv2d(features, features, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(features),
+            nn.ReLU(True),
+            nn.Dropout(0.1, False),
+            nn.Conv2d(features, self.num_classes, kernel_size=1),
+        )
+    def forward(self, out_features, patch_h, patch_w):
+        out = []
+        for i, x in enumerate(out_features):
+            if self.use_clstoken:
+                x, cls_token = x[0], x[1]
+                readout = cls_token.unsqueeze(1).expand_as(x)
+                x = self.readout_projects[i](torch.cat((x, readout), -1))
+            else:
+                x = x[0]
+            x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
+            x = self.projects[i](x)
+            x = self.resize_layers[i](x)
+            out.append(x)
+        layer_1, layer_2, layer_3, layer_4 = out
+        layer_1_rn = self.scratch.layer1_rn(layer_1)
+        layer_2_rn = self.scratch.layer2_rn(layer_2)
+        layer_3_rn = self.scratch.layer3_rn(layer_3)
+        layer_4_rn = self.scratch.layer4_rn(layer_4)
+        path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
+        path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
+        path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
+        path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
+        out = self.scratch.output_conv1(path_1)
+        # B C H W - segmentaton logits
+        out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
+        return out

src/lari/model/heads.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils
+import torch.utils.checkpoint
+import torch.version
+from typing import *
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
+from src.lari.model.blocks import ResidualConvBlock, make_upsampler, make_output_block
+from src.lari.utils.geometry_torch import normalized_view_plane_uv, recover_focal_shift, gaussian_blur_2d
+class PointHead(nn.Module):
+    def __init__(
+        self,
+        num_features: int,
+        dim_in: int,
+        dim_out: int,
+        dim_proj: int = 512,
+        dim_upsample: List[int] = [256, 128, 128],
+        dim_times_res_block_hidden: int = 1,
+        num_res_blocks: int = 1,
+        res_block_norm: Literal['group_norm', 'layer_norm'] = 'group_norm',
+        last_res_blocks: int = 0,
+        last_conv_channels: int = 32,
+        last_conv_size: int = 1,
+        num_output_layer: int = 5
+    ):
+        super().__init__()
+        self.num_output_layer = num_output_layer
+        self.projects = nn.ModuleList([
+            nn.Conv2d(in_channels=dim_in, out_channels=dim_proj, kernel_size=1, stride=1, padding=0,) for _ in range(num_features)
+        ])
+        self.upsample_blocks = nn.ModuleList([
+            nn.Sequential(
+                make_upsampler(in_ch + 2, out_ch),
+                *(ResidualConvBlock(out_ch, out_ch, dim_times_res_block_hidden * out_ch, activation="relu", norm=res_block_norm) for _ in range(num_res_blocks))
+            ) for in_ch, out_ch in zip([dim_proj] + dim_upsample[:-1], dim_upsample)
+        ])
+        # layer iterations
+        self.first_layer_block = make_output_block(dim_upsample[-1] + 2, dim_out,
+                                                   dim_times_res_block_hidden, last_res_blocks, last_conv_channels, last_conv_size, res_block_norm,)
+        self.remaining_layer_block = nn.ModuleList([make_output_block(dim_upsample[-1] + 2, dim_out,
+                                                                      dim_times_res_block_hidden, last_res_blocks, last_conv_channels, last_conv_size, res_block_norm,)
+                                                            for _ in range(self.num_output_layer - 1)])
+    def forward(self, hidden_states: torch.Tensor, image: torch.Tensor):
+        img_h, img_w = image.shape[-2:]
+        patch_h, patch_w = img_h // 14, img_w // 14
+        # Process the hidden states
+        x = torch.stack([
+            proj(feat.permute(0, 2, 1).unflatten(2, (patch_h, patch_w)).contiguous())
+                for proj, (feat, clstoken) in zip(self.projects, hidden_states)
+        ], dim=1).sum(dim=1)
+        # Upsample stage
+        # (patch_h, patch_w) -> (patch_h * 2, patch_w * 2) -> (patch_h * 4, patch_w * 4) -> (patch_h * 8, patch_w * 8)
+        for i, block in enumerate(self.upsample_blocks):
+            # UV coordinates is for awareness of image aspect ratio
+            uv = normalized_view_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
+            uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
+            x = torch.cat([x, uv], dim=1)
+            for layer in block:
+                x = torch.utils.checkpoint.checkpoint(layer, x, use_reentrant=False)
+        # (patch_h * 8, patch_w * 8) -> (img_h, img_w)
+        x = F.interpolate(x, (img_h, img_w), mode="bilinear", align_corners=False)
+        uv = normalized_view_plane_uv(width=x.shape[-1], height=x.shape[-2], aspect_ratio=img_w / img_h, dtype=x.dtype, device=x.device)
+        uv = uv.permute(2, 0, 1).unsqueeze(0).expand(x.shape[0], -1, -1, -1)
+        x = torch.cat([x, uv], dim=1)
+        pts_list = []
+        for layer_id in range(self.num_output_layer):
+            if layer_id == 0:
+                blocks = self.first_layer_block
+            else:
+                blocks = self.remaining_layer_block[layer_id-1]
+            # for each block
+            if isinstance(blocks, nn.ModuleList):
+                raise NotImplementedError()
+            else:
+                res = torch.utils.checkpoint.checkpoint(blocks, x, use_reentrant=False)[:,:3, :,:]
+                pts_list.append(res[:, :3, :,:])
+        pts = torch.stack(pts_list, dim=-1)
+        seg = pts.new_zeros(pts.shape)[:, :1, ...]
+        # <b 3 h w l>, <b 1 h w l>
+        output = [pts, seg]
+        return output

src/lari/model/lari_model.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from typing import *
+from numbers import Number
+from functools import partial
+from pathlib import Path
+import importlib
+import warnings
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils
+import torch.utils.checkpoint
+import torch.version
+from huggingface_hub import hf_hub_download
+from src.lari.model.utils import wrap_dinov2_attention_with_sdpa, wrap_module_with_gradient_checkpointing, unwrap_module_with_gradient_checkpointing
+from src.lari.model.heads import PointHead
+class LaRIModel(nn.Module):
+    image_mean: torch.Tensor
+    image_std: torch.Tensor
+    def __init__(self,
+        encoder: str = 'dinov2_vitl14',
+        intermediate_layers: Union[int, List[int]] = 4,
+        dim_proj: int = 512,
+        dim_upsample: List[int] = [256, 128, 64],
+        dim_times_res_block_hidden: int = 2,
+        num_res_blocks: int = 2,
+        output_mask: bool = True,
+        split_head: bool = True,
+        remap_output: Literal[False, True, 'linear', 'sinh', 'exp', 'sinh_exp'] = 'exp',
+        res_block_norm: Literal['group_norm', 'layer_norm'] = 'group_norm',
+        last_res_blocks: int = 0,
+        last_conv_channels: int = 32,
+        last_conv_size: int = 1,
+        use_pretrained: Literal["dinov2", "moge_full", "moge_backbone", None] = None,
+        pretrained_path: str = "",
+        num_output_layer: str = None,
+        head_type = None,
+        **deprecated_kwargs
+    ):
+        super(LaRIModel, self).__init__()
+        if deprecated_kwargs:
+            warnings.warn(f"The following deprecated/invalid arguments are ignored: {deprecated_kwargs}")
+        self.encoder = encoder
+        self.remap_output = remap_output
+        self.intermediate_layers = intermediate_layers
+        self.head_type = head_type
+        self.output_mask = output_mask
+        self.split_head = split_head
+        self.use_pretrained = use_pretrained
+        self.pretrained_path = pretrained_path
+        self.num_output_layer = num_output_layer
+        hub_loader = getattr(importlib.import_module(".dinov2.hub.backbones", __package__), encoder)
+        # hub_loader = getattr(importlib.import_module("dinov2.hub.backbones", __package__), encoder)
+        self.backbone = hub_loader(pretrained=True if self.use_pretrained == "dinov2" else False)
+        dim_feature = self.backbone.blocks[0].attn.qkv.in_features
+        if self.head_type == "point":
+            self.head = PointHead(
+                num_features=intermediate_layers if isinstance(intermediate_layers, int) else len(intermediate_layers),
+                dim_in=dim_feature,
+                dim_out=3,
+                dim_proj=dim_proj,
+                dim_upsample=dim_upsample,
+                dim_times_res_block_hidden=dim_times_res_block_hidden,
+                num_res_blocks=num_res_blocks,
+                res_block_norm=res_block_norm,
+                last_res_blocks=last_res_blocks,
+                last_conv_channels=last_conv_channels,
+                last_conv_size=last_conv_size,
+                num_output_layer = num_output_layer
+            )
+        else:
+            raise NotImplementedError()
+        if torch.__version__ >= '2.0':
+            self.enable_pytorch_native_sdpa()
+        self._load_pretrained()
+    def _load_pretrained(self):
+        '''
+        Load pre-trained weights
+        '''
+        if self.use_pretrained == "dinov2" or self.use_pretrained is None: return
+        if self.use_pretrained == "moge_full" and self.pretrained_path != "":
+            checkpoint = torch.load(self.pretrained_path, map_location='cpu', weights_only=True)
+            if self.head_type == "point":
+                key_transition_map = {"output_block": "first_layer_block"}
+                model_state_dict = {}
+                # change the key name of the dict
+                for key, val in checkpoint['model'].items():
+                    for trans_src, trans_target in key_transition_map.items():
+                        if trans_src in key:
+                            model_state_dict[key.replace(trans_src, trans_target)] = val
+                        else:
+                            model_state_dict[key] = val
+                self.load_state_dict(model_state_dict, strict=False)
+                del model_state_dict
+            else:
+                return
+    @staticmethod
+    def cache_pretrained_backbone(encoder: str, pretrained: bool):
+        _ = torch.hub.load('facebookresearch/dinov2', encoder, pretrained=pretrained)
+    def load_pretrained_backbone(self):
+        "Load the backbone with pretrained dinov2 weights from torch hub"
+        state_dict = torch.hub.load('facebookresearch/dinov2', self.encoder, pretrained=True).state_dict()
+        self.backbone.load_state_dict(state_dict)
+    def enable_backbone_gradient_checkpointing(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i] = wrap_module_with_gradient_checkpointing(self.backbone.blocks[i])
+    def enable_pytorch_native_sdpa(self):
+        for i in range(len(self.backbone.blocks)):
+            self.backbone.blocks[i].attn = wrap_dinov2_attention_with_sdpa(self.backbone.blocks[i].attn)
+    def forward(self, image: torch.Tensor, mixed_precision: bool = False) -> Dict[str, torch.Tensor]:
+        raw_img_h, raw_img_w = image.shape[-2:]
+        patch_h, patch_w = raw_img_h // 14, raw_img_w // 14
+        # Apply image transformation for DINOv2
+        image_14 = F.interpolate(image, (patch_h * 14, patch_w * 14), mode="bilinear", align_corners=False, antialias=True)
+        # Get intermediate layers from the backbone
+        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=mixed_precision):
+            features = self.backbone.get_intermediate_layers(image_14, self.intermediate_layers, return_class_token=True)
+        # Predict points and mask (mask scores)
+        points, mask = self.head(features, image)
+        is_output_prob = False
+        if mask.ndim == 5:
+            # <b, h, w, layer, 3>, <b, h, w, layer, 1>
+            points, mask = points.permute(0, 2, 3, 4, 1), mask.permute(0,2,3,4,1)
+        elif mask.ndim == 4: # <b, h, w, layer, 3>, <b, layer, h, w>
+            points = points.permute(0, 2, 3, 4, 1)
+            is_output_prob = True
+        if self.remap_output == 'linear' or self.remap_output == False:
+            pass
+        elif self.remap_output =='sinh' or self.remap_output == True:
+            points = torch.sinh(points)
+        elif self.remap_output == 'exp':
+            xy, z = points.split([2, 1], dim=-1)
+            z = torch.exp(z)
+            points = torch.cat([xy * z, z], dim=-1)
+        elif self.remap_output =='sinh_exp':
+            xy, z = points.split([2, 1], dim=-1)
+            points = torch.cat([torch.sinh(xy), torch.exp(z)], dim=-1)
+        else:
+            raise ValueError(f"Invalid remap output type: {self.remap_output}")
+        return_dict = {'pts3d': points}
+        if not is_output_prob:
+            return_dict['mask'] = mask
+        else:
+            return_dict["seg_prob"] = mask
+        return return_dict

src/lari/model/utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing import *
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def wrap_module_with_gradient_checkpointing(module: nn.Module):
+    from torch.utils.checkpoint import checkpoint
+    class _CheckpointingWrapper(module.__class__):
+        _restore_cls = module.__class__
+        def forward(self, *args, **kwargs):
+            return checkpoint(super().forward, *args, use_reentrant=False, **kwargs)
+    module.__class__ = _CheckpointingWrapper
+    return module
+def unwrap_module_with_gradient_checkpointing(module: nn.Module):
+    module.__class__ = module.__class__._restore_cls
+def wrap_dinov2_attention_with_sdpa(module: nn.Module):
+    assert torch.__version__ >= '2.0', "SDPA requires PyTorch 2.0 or later"
+    class _AttentionWrapper(module.__class__):
+        def forward(self, x: torch.Tensor, attn_bias=None) -> torch.Tensor:
+            B, N, C = x.shape
+            qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)  # (3, B, H, N, C // H)
+            q, k, v = torch.unbind(qkv, 0)      # (B, H, N, C // H)
+            x = F.scaled_dot_product_attention(q, k, v, attn_bias)
+            x = x.permute(0, 2, 1, 3).reshape(B, N, C)
+            x = self.proj(x)
+            x = self.proj_drop(x)
+            return x
+    module.__class__ = _AttentionWrapper
+    return module

src/lari/utils/__init__.py ADDED Viewed

File without changes

src/lari/utils/geometry_numpy.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from typing import *
+from functools import partial
+import math
+import numpy as np
+import utils3d
+def weighted_mean_numpy(x: np.ndarray, w: np.ndarray = None, axis: Union[int, Tuple[int,...]] = None, keepdims: bool = False, eps: float = 1e-7) -> np.ndarray:
+    if w is None:
+        return np.mean(x, axis=axis)
+    else:
+        w = w.astype(x.dtype)
+        return (x * w).mean(axis=axis) / np.clip(w.mean(axis=axis), eps, None)
+def harmonic_mean_numpy(x: np.ndarray, w: np.ndarray = None, axis: Union[int, Tuple[int,...]] = None, keepdims: bool = False, eps: float = 1e-7) -> np.ndarray:
+    if w is None:
+        return 1 / (1 / np.clip(x, eps, None)).mean(axis=axis)
+    else:
+        w = w.astype(x.dtype)
+        return 1 / (weighted_mean_numpy(1 / (x + eps), w, axis=axis, keepdims=keepdims, eps=eps) + eps)
+def normalized_view_plane_uv_numpy(width: int, height: int, aspect_ratio: float = None, dtype: np.dtype = np.float32) -> np.ndarray:
+    "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
+    if aspect_ratio is None:
+        aspect_ratio = width / height
+    span_x = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5
+    span_y = 1 / (1 + aspect_ratio ** 2) ** 0.5
+    u = np.linspace(-span_x * (width - 1) / width, span_x * (width - 1) / width, width, dtype=dtype)
+    v = np.linspace(-span_y * (height - 1) / height, span_y * (height - 1) / height, height, dtype=dtype)
+    u, v = np.meshgrid(u, v, indexing='xy')
+    uv = np.stack([u, v], axis=-1)
+    return uv
+def focal_to_fov_numpy(focal: np.ndarray):
+    return 2 * np.arctan(0.5 / focal)
+def fov_to_focal_numpy(fov: np.ndarray):
+    return 0.5 / np.tan(fov / 2)
+def intrinsics_to_fov_numpy(intrinsics: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    fov_x = focal_to_fov_numpy(intrinsics[..., 0, 0])
+    fov_y = focal_to_fov_numpy(intrinsics[..., 1, 1])
+    return fov_x, fov_y
+def point_map_to_depth_legacy_numpy(points: np.ndarray):
+    height, width = points.shape[-3:-1]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv_numpy(width, height, dtype=points.dtype)  # (H, W, 2)
+    _, uv = np.broadcast_arrays(points[..., :2], uv)
+    # Solve least squares problem
+    b = (uv * points[..., 2:]).reshape(*points.shape[:-3], -1)                                  # (..., H * W * 2)
+    A = np.stack([points[..., :2], -uv], axis=-1).reshape(*points.shape[:-3], -1, 2)   # (..., H * W * 2, 2)
+    M = A.swapaxes(-2, -1) @ A
+    solution = (np.linalg.inv(M + 1e-6 * np.eye(2)) @ (A.swapaxes(-2, -1) @ b[..., None])).squeeze(-1)
+    focal, shift = solution
+    depth = points[..., 2] + shift[..., None, None]
+    fov_x = np.arctan(width / diagonal / focal) * 2
+    fov_y = np.arctan(height / diagonal / focal) * 2
+    return depth, fov_x, fov_y, shift
+def solve_optimal_focal_shift(uv: np.ndarray, xyz: np.ndarray):
+    "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift and focal"
+    from scipy.optimize import least_squares
+    uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
+    def fn(uv: np.ndarray, xy: np.ndarray, z: np.ndarray, shift: np.ndarray):
+        xy_proj = xy / (z + shift)[: , None]
+        f = (xy_proj * uv).sum() / np.square(xy_proj).sum()
+        err = (f * xy_proj - uv).ravel()
+        return err
+    solution = least_squares(partial(fn, uv, xy, z), x0=0, ftol=1e-3, method='lm')
+    optim_shift = solution['x'].squeeze().astype(np.float32)
+    xy_proj = xy / (z + optim_shift)[: , None]
+    optim_focal = (xy_proj * uv).sum() / np.square(xy_proj).sum()
+    return optim_shift, optim_focal
+def solve_optimal_shift(uv: np.ndarray, xyz: np.ndarray, focal: float):
+    "Solve `min |focal * xy / (z + shift) - uv|` with respect to shift"
+    from scipy.optimize import least_squares
+    uv, xy, z = uv.reshape(-1, 2), xyz[..., :2].reshape(-1, 2), xyz[..., 2].reshape(-1)
+    def fn(uv: np.ndarray, xy: np.ndarray, z: np.ndarray, shift: np.ndarray):
+        xy_proj = xy/ (z + shift)[: , None]
+        err = (focal * xy_proj - uv).ravel()
+        return err
+    solution = least_squares(partial(fn, uv, xy, z), x0=0, ftol=1e-3, method='lm')
+    optim_shift = solution['x'].squeeze().astype(np.float32)
+    return optim_shift
+def recover_focal_shift_numpy(points: np.ndarray, mask: np.ndarray = None, focal: float = None, downsample_size: Tuple[int, int] = (64, 64)):
+    import cv2
+    assert points.shape[-1] == 3, "Points should (H, W, 3)"
+    height, width = points.shape[-3], points.shape[-2]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv_numpy(width=width, height=height)
+    if mask is None:
+        points_lr = cv2.resize(points, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 3)
+        uv_lr = cv2.resize(uv, downsample_size, interpolation=cv2.INTER_LINEAR).reshape(-1, 2)
+    else:
+        index, mask_lr = mask_aware_nearest_resize_numpy(mask, *downsample_size)
+        points_lr, uv_lr = points[index][mask_lr], uv[index][mask_lr]
+    if points_lr.size == 0:
+        return np.zeros((height, width)), 0, 0, 0
+    if focal is None:
+        focal, shift = solve_optimal_focal_shift(uv_lr, points_lr)
+    else:
+        shift = solve_optimal_shift(uv_lr, points_lr, focal)
+    return focal, shift
+def mask_aware_nearest_resize_numpy(mask: np.ndarray, target_width: int, target_height: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Resize 2D map by nearest interpolation. Return the nearest neighbor index and mask of the resized map.
+    ### Parameters
+    - `mask`: Input 2D mask of shape (..., H, W)
+    - `target_width`: target width of the resized map
+    - `target_height`: target height of the resized map
+    ### Returns
+    - `nearest_idx`: Nearest neighbor index of the resized map of shape (..., target_height, target_width). Indices are like j + i * W, where j is the row index and i is the column index.
+    - `target_mask`: Mask of the resized map of shape (..., target_height, target_width)
+    """
+    height, width = mask.shape[-2:]
+    filter_h_f, filter_w_f = max(1, height / target_height), max(1, width / target_width)
+    filter_h_i, filter_w_i = math.ceil(filter_h_f), math.ceil(filter_w_f)
+    filter_size = filter_h_i * filter_w_i
+    padding_h, padding_w = round(filter_h_f / 2), round(filter_w_f / 2)
+    # Window the original mask and uv
+    uv = utils3d.numpy.image_pixel_center(width=width, height=height, dtype=np.float32)
+    indices = np.arange(height * width, dtype=np.int32).reshape(height, width)
+    padded_uv = np.full((height + 2 * padding_h, width + 2 * padding_w, 2), 0, dtype=np.float32)
+    padded_uv[padding_h:padding_h + height, padding_w:padding_w + width] = uv
+    padded_mask = np.full((*mask.shape[:-2], height + 2 * padding_h, width + 2 * padding_w), False, dtype=bool)
+    padded_mask[..., padding_h:padding_h + height, padding_w:padding_w + width] = mask
+    padded_indices = np.full((height + 2 * padding_h, width + 2 * padding_w), 0, dtype=np.int32)
+    padded_indices[padding_h:padding_h + height, padding_w:padding_w + width] = indices
+    windowed_uv = utils3d.numpy.sliding_window_2d(padded_uv, (filter_h_i, filter_w_i), 1, axis=(0, 1))
+    windowed_mask = utils3d.numpy.sliding_window_2d(padded_mask, (filter_h_i, filter_w_i), 1, axis=(-2, -1))
+    windowed_indices = utils3d.numpy.sliding_window_2d(padded_indices, (filter_h_i, filter_w_i), 1, axis=(0, 1))
+    # Gather the target pixels's local window
+    target_uv = utils3d.numpy.image_uv(width=target_width, height=target_height, dtype=np.float32) * np.array([width, height], dtype=np.float32)
+    target_corner = target_uv - np.array((filter_w_f / 2, filter_h_f / 2), dtype=np.float32)
+    target_corner = np.round(target_corner - 0.5).astype(np.int32) + np.array((padding_w, padding_h), dtype=np.int32)
+    target_window_uv = windowed_uv[target_corner[..., 1], target_corner[..., 0], :, :, :].reshape(target_height, target_width, 2, filter_size)                          # (target_height, tgt_width, 2, filter_size)
+    target_window_mask = windowed_mask[..., target_corner[..., 1], target_corner[..., 0], :, :].reshape(*mask.shape[:-2], target_height, target_width, filter_size)     # (..., target_height, tgt_width, filter_size)
+    target_window_indices = windowed_indices[target_corner[..., 1], target_corner[..., 0], :, :].reshape(target_height, target_width, filter_size)                      # (target_height, tgt_width, filter_size)
+    # Compute nearest neighbor in the local window for each pixel
+    dist = np.square(target_window_uv - target_uv[..., None])
+    dist = dist[..., 0, :] + dist[..., 1, :]
+    dist = np.where(target_window_mask, dist, np.inf)                                                   # (..., target_height, tgt_width, filter_size)
+    nearest_in_window = np.argmin(dist, axis=-1, keepdims=True)                                         # (..., target_height, tgt_width, 1)
+    nearest_idx = np.take_along_axis(target_window_indices, nearest_in_window, axis=-1).squeeze(-1)     # (..., target_height, tgt_width)
+    nearest_i, nearest_j = nearest_idx // width, nearest_idx % width
+    target_mask = np.any(target_window_mask, axis=-1)
+    batch_indices = [np.arange(n).reshape([1] * i + [n] + [1] * (mask.ndim - i - 1)) for i, n in enumerate(mask.shape[:-2])]
+    return (*batch_indices, nearest_i, nearest_j), target_mask

src/lari/utils/geometry_torch.py ADDED Viewed

	@@ -0,0 +1,221 @@

+from typing import *
+import math
+from collections import namedtuple
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.types
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+import utils3d
+from .geometry_numpy import solve_optimal_focal_shift, solve_optimal_shift
+def weighted_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.mean(dim=dim, keepdim=keepdim)
+    else:
+        w = w.to(x.dtype)
+        return (x * w).mean(dim=dim, keepdim=keepdim) / w.mean(dim=dim, keepdim=keepdim).add(eps)
+def harmonic_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.add(eps).reciprocal().mean(dim=dim, keepdim=keepdim).reciprocal()
+    else:
+        w = w.to(x.dtype)
+        return weighted_mean(x.add(eps).reciprocal(), w, dim=dim, keepdim=keepdim, eps=eps).add(eps).reciprocal()
+def geometric_mean(x: torch.Tensor, w: torch.Tensor = None, dim: Union[int, torch.Size] = None, keepdim: bool = False, eps: float = 1e-7) -> torch.Tensor:
+    if w is None:
+        return x.add(eps).log().mean(dim=dim).exp()
+    else:
+        w = w.to(x.dtype)
+        return weighted_mean(x.add(eps).log(), w, dim=dim, keepdim=keepdim, eps=eps).exp()
+def normalized_view_plane_uv(width: int, height: int, aspect_ratio: float = None, dtype: torch.dtype = None, device: torch.device = None) -> torch.Tensor:
+    "UV with left-top corner as (-width / diagonal, -height / diagonal) and right-bottom corner as (width / diagonal, height / diagonal)"
+    if aspect_ratio is None:
+        aspect_ratio = width / height
+    span_x = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5
+    span_y = 1 / (1 + aspect_ratio ** 2) ** 0.5
+    u = torch.linspace(-span_x * (width - 1) / width, span_x * (width - 1) / width, width, dtype=dtype, device=device)
+    v = torch.linspace(-span_y * (height - 1) / height, span_y * (height - 1) / height, height, dtype=dtype, device=device)
+    u, v = torch.meshgrid(u, v, indexing='xy')
+    uv = torch.stack([u, v], dim=-1)
+    return uv
+def gaussian_blur_2d(input: torch.Tensor, kernel_size: int, sigma: float) -> torch.Tensor:
+    kernel = torch.exp(-(torch.arange(-kernel_size // 2 + 1, kernel_size // 2 + 1, dtype=input.dtype, device=input.device) ** 2) / (2 * sigma ** 2))
+    kernel = kernel / kernel.sum()
+    kernel = (kernel[:, None] * kernel[None, :]).reshape(1, 1, kernel_size, kernel_size)
+    input = F.pad(input, (kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2), mode='replicate')
+    input = F.conv2d(input, kernel, groups=input.shape[1])
+    return input
+def focal_to_fov(focal: torch.Tensor):
+    return 2 * torch.atan(0.5 / focal)
+def fov_to_focal(fov: torch.Tensor):
+    return 0.5 / torch.tan(fov / 2)
+def intrinsics_to_fov(intrinsics: torch.Tensor):
+    """
+    Returns field of view in radians from normalized intrinsics matrix.
+    ### Parameters:
+    - intrinsics: torch.Tensor of shape (..., 3, 3)
+    ### Returns:
+    - fov_x: torch.Tensor of shape (...)
+    - fov_y: torch.Tensor of shape (...)
+    """
+    focal_x = intrinsics[..., 0, 0]
+    focal_y = intrinsics[..., 1, 1]
+    return 2 * torch.atan(0.5 / focal_x), 2 * torch.atan(0.5 / focal_y)
+def point_map_to_depth_legacy(points: torch.Tensor):
+    height, width = points.shape[-3:-1]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    uv = normalized_view_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
+    # Solve least squares problem
+    b = (uv * points[..., 2:]).flatten(-3, -1)                        # (..., H * W * 2)
+    A = torch.stack([points[..., :2], -uv.expand_as(points[..., :2])], dim=-1).flatten(-4, -2)   # (..., H * W * 2, 2)
+    M = A.transpose(-2, -1) @ A
+    solution = (torch.inverse(M + 1e-6 * torch.eye(2).to(A)) @ (A.transpose(-2, -1) @ b[..., None])).squeeze(-1)
+    focal, shift = solution.unbind(-1)
+    depth = points[..., 2] + shift[..., None, None]
+    fov_x = torch.atan(width / diagonal / focal) * 2
+    fov_y = torch.atan(height / diagonal / focal) * 2
+    return depth, fov_x, fov_y, shift
+def view_plane_uv_to_focal(uv: torch.Tensor):
+    normed_uv = normalized_view_plane_uv(width=uv.shape[-2], height=uv.shape[-3], device=uv.device, dtype=uv.dtype)
+    focal = (uv * normed_uv).sum() / uv.square().sum().add(1e-12)
+    return focal
+def recover_focal_shift(points: torch.Tensor, mask: torch.Tensor = None, focal: torch.Tensor = None, downsample_size: Tuple[int, int] = (64, 64)):
+    """
+    Recover the depth map and FoV from a point map with unknown z shift and focal.
+    Note that it assumes:
+    - the optical center is at the center of the map
+    - the map is undistorted
+    - the map is isometric in the x and y directions
+    ### Parameters:
+    - `points: torch.Tensor` of shape (..., H, W, 3)
+    - `mask: torch.Tensor` of shape (..., H, W). Optional.
+    - `focal: torch.Tensor` of shape (...). Optional.
+    - `downsample_size: Tuple[int, int]` in (height, width), the size of the downsampled map. Downsampling produces approximate solution and is efficient for large maps.
+    ### Returns:
+    - `focal`: torch.Tensor of shape (...) the estimated focal length, relative to the half diagonal of the map
+    - `shift`: torch.Tensor of shape (...) Z-axis shift to translate the point map to camera space
+    """
+    shape = points.shape
+    height, width = points.shape[-3], points.shape[-2]
+    diagonal = (height ** 2 + width ** 2) ** 0.5
+    points = points.reshape(-1, *shape[-3:])
+    mask = None if mask is None else mask.reshape(-1, *shape[-3:-1])
+    focal = focal.reshape(-1) if focal is not None else None
+    uv = normalized_view_plane_uv(width, height, dtype=points.dtype, device=points.device)  # (H, W, 2)
+    points_lr = F.interpolate(points.permute(0, 3, 1, 2), downsample_size, mode='nearest').permute(0, 2, 3, 1)
+    uv_lr = F.interpolate(uv.unsqueeze(0).permute(0, 3, 1, 2), downsample_size, mode='nearest').squeeze(0).permute(1, 2, 0)
+    mask_lr = None if mask is None else F.interpolate(mask.to(torch.float32).unsqueeze(1), downsample_size, mode='nearest').squeeze(1) > 0
+    uv_lr_np = uv_lr.cpu().numpy()
+    points_lr_np = points_lr.detach().cpu().numpy()
+    focal_np = focal.cpu().numpy() if focal is not None else None
+    mask_lr_np = None if mask is None else mask_lr.cpu().numpy()
+    optim_shift, optim_focal = [], []
+    for i in range(points.shape[0]):
+        points_lr_i_np = points_lr_np[i] if mask is None else points_lr_np[i][mask_lr_np[i]]
+        uv_lr_i_np = uv_lr_np if mask is None else uv_lr_np[mask_lr_np[i]]
+        if focal is None:
+            optim_shift_i, optim_focal_i = solve_optimal_focal_shift(uv_lr_i_np, points_lr_i_np)
+            optim_focal.append(float(optim_focal_i))
+        else:
+            optim_shift_i = solve_optimal_shift(uv_lr_i_np, points_lr_i_np, focal_np[i])
+        optim_shift.append(float(optim_shift_i))
+    optim_shift = torch.tensor(optim_shift, device=points.device, dtype=points.dtype).reshape(shape[:-3])
+    if focal is None:
+        optim_focal = torch.tensor(optim_focal, device=points.device, dtype=points.dtype).reshape(shape[:-3])
+    else:
+        optim_focal = focal.reshape(shape[:-3])
+    return optim_focal, optim_shift
+def mask_aware_nearest_resize(mask: torch.BoolTensor, target_width: int, target_height: int) -> Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]:
+    """
+    Resize 2D map by nearest interpolation. Return the nearest neighbor index and mask of the resized map.
+    ### Parameters
+    - `mask`: Input 2D mask of shape (..., H, W)
+    - `target_width`: target width of the resized map
+    - `target_height`: target height of the resized map
+    ### Returns
+    - `nearest_idx`: Nearest neighbor index of the resized map of shape (..., target_height, target_width) for each dimension
+    - `target_mask`: Mask of the resized map of shape (..., target_height, target_width)
+    """
+    height, width = mask.shape[-2:]
+    device = mask.device
+    filter_h_f, filter_w_f = max(1, height / target_height), max(1, width / target_width)
+    filter_h_i, filter_w_i = math.ceil(filter_h_f), math.ceil(filter_w_f)
+    filter_size = filter_h_i * filter_w_i
+    padding_h, padding_w = round(filter_h_f / 2), round(filter_w_f / 2)
+    # Window the original mask and uv
+    uv = utils3d.torch.image_pixel_center(width=width, height=height, dtype=torch.float32, device=device)
+    indices = torch.arange(height * width, dtype=torch.long, device=device).reshape(height, width)
+    padded_uv = torch.full((height + 2 * padding_h, width + 2 * padding_w, 2), 0, dtype=torch.float32, device=device)
+    padded_uv[padding_h:padding_h + height, padding_w:padding_w + width] = uv
+    padded_mask = torch.full((*mask.shape[:-2], height + 2 * padding_h, width + 2 * padding_w), False, dtype=torch.bool, device=device)
+    padded_mask[..., padding_h:padding_h + height, padding_w:padding_w + width] = mask
+    padded_indices = torch.full((height + 2 * padding_h, width + 2 * padding_w), 0, dtype=torch.long, device=device)
+    padded_indices[padding_h:padding_h + height, padding_w:padding_w + width] = indices
+    windowed_uv = utils3d.torch.sliding_window_2d(padded_uv, (filter_h_i, filter_w_i), 1, dim=(0, 1))
+    windowed_mask = utils3d.torch.sliding_window_2d(padded_mask, (filter_h_i, filter_w_i), 1, dim=(-2, -1))
+    windowed_indices = utils3d.torch.sliding_window_2d(padded_indices, (filter_h_i, filter_w_i), 1, dim=(0, 1))
+    # Gather the target pixels's local window
+    target_uv = utils3d.torch.image_uv(width=target_width, height=target_height, dtype=torch.float32, device=device) * torch.tensor([width, height], dtype=torch.float32, device=device)
+    target_corner = target_uv - torch.tensor((filter_w_f / 2, filter_h_f / 2), dtype=torch.float32, device=device)
+    target_corner = torch.round(target_corner - 0.5).long() + torch.tensor((padding_w, padding_h), dtype=torch.long, device=device)
+    target_window_uv = windowed_uv[target_corner[..., 1], target_corner[..., 0], :, :, :].reshape(target_height, target_width, 2, filter_size)                          # (target_height, tgt_width, 2, filter_size)
+    target_window_mask = windowed_mask[..., target_corner[..., 1], target_corner[..., 0], :, :].reshape(*mask.shape[:-2], target_height, target_width, filter_size)     # (..., target_height, tgt_width, filter_size)
+    target_window_indices = windowed_indices[target_corner[..., 1], target_corner[..., 0], :, :].reshape(target_height, target_width, filter_size)                      # (target_height, tgt_width, filter_size)
+    target_window_indices = target_window_indices.expand_as(target_window_mask)
+    # Compute nearest neighbor in the local window for each pixel
+    dist = torch.where(target_window_mask, torch.norm(target_window_uv - target_uv[..., None], dim=-2), torch.inf)  # (..., target_height, tgt_width, filter_size)
+    nearest = torch.argmin(dist, dim=-1, keepdim=True)                                                              # (..., target_height, tgt_width, 1)
+    nearest_idx = torch.gather(target_window_indices, index=nearest, dim=-1).squeeze(-1)                            # (..., target_height, tgt_width)
+    target_mask = torch.any(target_window_mask, dim=-1)
+    nearest_i, nearest_j = nearest_idx // width, nearest_idx % width
+    batch_indices = [torch.arange(n, device=device).reshape([1] * i + [n] + [1] * (mask.dim() - i - 1)) for i, n in enumerate(mask.shape[:-2])]
+    return (*batch_indices, nearest_i, nearest_j), target_mask

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	+ # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

src/utils/vis.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# import torchvision.transforms as transforms
+# import torch.nn.functional as F
+# import cv2
+# import os
+# import logging
+# from pathlib import Path
+import numpy as np
+# import os
+import torch
+import matplotlib
+# import cv2
+# import random
+# from PIL import Image
+# import imageio
+def prob_to_mask(prob):
+    """
+    Transforms a probability map of stopping points (shape: (n_layer+1, H, W))
+    into a binary mask (shape: (H, W, n_layer, 1)) where for each pixel, layers
+    with index ≤ stopping index (as given by argmax) are marked valid.
+    """
+    num_layer_plus1, H, W = prob.shape
+    # Get stopping index for each pixel; values are in {0, 1, ..., n_layer}
+    stopping_indices = torch.argmax(prob, dim=0)  # (H, W)
+    # Create a tensor with layer indices [1, 2, ..., n_layer]
+    layer_indices = torch.arange(1, num_layer_plus1, device=prob.device).view(-1, 1, 1)
+    # Compare: a layer is valid if its index is <= the stopping index.
+    pred_mask = (layer_indices <= stopping_indices.unsqueeze(0))
+    # Permute and unsqueeze to get shape (H, W, n_layer, 1)
+    pred_mask = pred_mask.permute(1, 2, 0).unsqueeze(-1)
+    return pred_mask
+def colorize(value, vmin=None, vmax=None, cmap='rainbow', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None):
+    """Converts a depth map to a color image.
+    Args:
+        value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed
+        vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None.
+        vmax (float, optional):  vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None.
+        cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'.
+        invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99.
+        invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None.
+        background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255).
+        gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False.
+        value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None.
+    Returns:
+        numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4)
+    """
+    if isinstance(value, torch.Tensor):
+        value = value.detach().cpu().numpy()
+    value = value.squeeze()
+    if invalid_mask is None:
+        invalid_mask = value == invalid_val
+    mask = np.logical_not(invalid_mask)
+    # normalize
+    vmin = np.percentile(value[mask],2) if vmin is None else vmin
+    vmax = np.percentile(value[mask],85) if vmax is None else vmax
+    if vmin != vmax:
+        value = (value - vmin) / (vmax - vmin)  # vmin..vmax
+    else:
+        # Avoid 0-division
+        value = value * 0.
+    value[invalid_mask] = np.nan
+    cmapper = matplotlib.cm.get_cmap(cmap)
+    if value_transform:
+        value = value_transform(value)
+        # value = value / value.max()
+    value = cmapper(value, bytes=True)  # (nxmx4)
+    # img = value[:, :, :]
+    img = value[...]
+    img[invalid_mask] = background_color
+    if gamma_corrected:
+        # gamma correction
+        img = img / 255
+        img = np.power(img, 2.2)
+        img = img * 255
+        img = img.astype(np.uint8)
+    return img
+def denormalize(x):
+    """Reverses the imagenet normalization applied to the input.
+    Args:
+        x (torch.Tensor - shape(N,3,H,W)): input tensor
+    Returns:
+        torch.Tensor - shape(N,3,H,W): Denormalized input
+    """
+    mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
+    std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
+    return x * std + mean

src/utils3d/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # utils3d
2	+
3	+ This is a collection of utility functions for 3D computer vision tasks copied from https://github.com/EasternJournalist/utils3d.

src/utils3d/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+A package for common utility functions in 3D computer graphics and vision. Providing NumPy utilities in `utils3d.numpy`, PyTorch utilities in `utils3d.torch`, and IO utilities in `utils3d.io`.
+"""
+import importlib
+from typing import TYPE_CHECKING
+try:
+    from ._unified import *
+except ImportError:
+    pass
+__all__ = ['numpy', 'torch', 'io']
+def __getattr__(name: str):
+    return globals().get(name, importlib.import_module(f'.{name}', __package__))
+if TYPE_CHECKING:
+    from . import torch
+    from . import numpy
+    from . import io

src/utils3d/_helpers.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from functools import wraps
+import warnings
+def suppress_traceback(fn):
+    @wraps(fn)
+    def wrapper(*args, **kwargs):
+        try:
+            return fn(*args, **kwargs)
+        except Exception as e:
+            e.__traceback__ = e.__traceback__.tb_next.tb_next
+            raise
+    return wrapper
+class no_warnings:
+    def __init__(self, action: str = 'ignore', **kwargs):
+        self.action = action
+        self.filter_kwargs = kwargs
+    def __call__(self, fn):
+        @wraps(fn)
+        def wrapper(*args, **kwargs):
+            with warnings.catch_warnings():
+                warnings.simplefilter(self.action, **self.filter_kwargs)
+                return fn(*args, **kwargs)
+        return wrapper
+    def __enter__(self):
+        self.warnings_manager = warnings.catch_warnings()
+        self.warnings_manager.__enter__()
+        warnings.simplefilter(self.action, **self.filter_kwargs)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.warnings_manager.__exit__(exc_type, exc_val, exc_tb)

src/utils3d/_unified/__init__.py ADDED Viewed

	@@ -0,0 +1,934 @@

+# Auto-generated implementation redirecting to numpy/torch implementations
+import sys
+from typing import TYPE_CHECKING
+import utils3d
+from .._helpers import suppress_traceback
+__all__ = ["triangulate",
+"compute_face_normal",
+"compute_face_angle",
+"compute_vertex_normal",
+"compute_vertex_normal_weighted",
+"remove_corrupted_faces",
+"merge_duplicate_vertices",
+"remove_unreferenced_vertices",
+"subdivide_mesh_simple",
+"mesh_relations",
+"flatten_mesh_indices",
+"calc_quad_candidates",
+"calc_quad_distortion",
+"calc_quad_direction",
+"calc_quad_smoothness",
+"sovle_quad",
+"sovle_quad_qp",
+"tri_to_quad",
+"sliding_window_1d",
+"sliding_window_nd",
+"sliding_window_2d",
+"max_pool_1d",
+"max_pool_2d",
+"max_pool_nd",
+"depth_edge",
+"normals_edge",
+"depth_aliasing",
+"interpolate",
+"image_scrcoord",
+"image_uv",
+"image_pixel_center",
+"image_pixel",
+"image_mesh",
+"image_mesh_from_depth",
+"depth_to_normals",
+"points_to_normals",
+"chessboard",
+"cube",
+"icosahedron",
+"square",
+"camera_frustum",
+"perspective",
+"perspective_from_fov",
+"perspective_from_fov_xy",
+"intrinsics_from_focal_center",
+"intrinsics_from_fov",
+"fov_to_focal",
+"focal_to_fov",
+"intrinsics_to_fov",
+"view_look_at",
+"extrinsics_look_at",
+"perspective_to_intrinsics",
+"perspective_to_near_far",
+"intrinsics_to_perspective",
+"extrinsics_to_view",
+"view_to_extrinsics",
+"normalize_intrinsics",
+"crop_intrinsics",
+"pixel_to_uv",
+"pixel_to_ndc",
+"uv_to_pixel",
+"project_depth",
+"depth_buffer_to_linear",
+"unproject_cv",
+"unproject_gl",
+"project_cv",
+"project_gl",
+"quaternion_to_matrix",
+"axis_angle_to_matrix",
+"matrix_to_quaternion",
+"extrinsics_to_essential",
+"euler_axis_angle_rotation",
+"euler_angles_to_matrix",
+"skew_symmetric",
+"rotation_matrix_from_vectors",
+"ray_intersection",
+"se3_matrix",
+"slerp_quaternion",
+"slerp_vector",
+"lerp",
+"lerp_se3_matrix",
+"piecewise_lerp",
+"piecewise_lerp_se3_matrix",
+"apply_transform",
+"linear_spline_interpolate",
+"RastContext",
+"rasterize_triangle_faces",
+"rasterize_edges",
+"texture",
+"warp_image_by_depth",
+"test_rasterization",
+"compute_face_angles",
+"compute_face_tbn",
+"compute_vertex_tbn",
+"laplacian",
+"laplacian_smooth_mesh",
+"taubin_smooth_mesh",
+"laplacian_hc_smooth_mesh",
+"get_rays",
+"get_image_rays",
+"get_mipnerf_cones",
+"volume_rendering",
+"bin_sample",
+"importance_sample",
+"nerf_render_rays",
+"mipnerf_render_rays",
+"nerf_render_view",
+"mipnerf_render_view",
+"InstantNGP",
+"point_to_normal",
+"depth_to_normal",
+"masked_min",
+"masked_max",
+"bounding_rect",
+"intrinsics_from_fov_xy",
+"matrix_to_euler_angles",
+"matrix_to_axis_angle",
+"axis_angle_to_quaternion",
+"quaternion_to_axis_angle",
+"slerp",
+"interpolate_extrinsics",
+"interpolate_view",
+"to4x4",
+"rotation_matrix_2d",
+"rotate_2d",
+"translate_2d",
+"scale_2d",
+"apply_2d",
+"warp_image_by_forward_flow"]
+def _contains_tensor(obj):
+    if isinstance(obj, (list, tuple)):
+        return any(_contains_tensor(item) for item in obj)
+    elif isinstance(obj, dict):
+        return any(_contains_tensor(value) for value in obj.values())
+    else:
+        import torch
+        return isinstance(obj, torch.Tensor)
+@suppress_traceback
+def _call_based_on_args(fname, args, kwargs):
+    if 'torch' in sys.modules:
+        if any(_contains_tensor(arg) for arg in args) or any(_contains_tensor(v) for v in kwargs.values()):
+            fn = getattr(utils3d.torch, fname, None)
+            if fn is None:
+                raise NotImplementedError(f"Function {fname} has no torch implementation.")
+            return fn(*args, **kwargs)
+    fn = getattr(utils3d.numpy, fname, None)
+    if fn is None:
+        raise NotImplementedError(f"Function {fname} has no numpy implementation.")
+    return fn(*args, **kwargs)
+@suppress_traceback
+def triangulate(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.triangulate, utils3d.torch.triangulate
+    return _call_based_on_args('triangulate', args, kwargs)
+@suppress_traceback
+def compute_face_normal(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.compute_face_normal, utils3d.torch.compute_face_normal
+    return _call_based_on_args('compute_face_normal', args, kwargs)
+@suppress_traceback
+def compute_face_angle(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.compute_face_angle, None
+    return _call_based_on_args('compute_face_angle', args, kwargs)
+@suppress_traceback
+def compute_vertex_normal(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.compute_vertex_normal, utils3d.torch.compute_vertex_normal
+    return _call_based_on_args('compute_vertex_normal', args, kwargs)
+@suppress_traceback
+def compute_vertex_normal_weighted(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.compute_vertex_normal_weighted, utils3d.torch.compute_vertex_normal_weighted
+    return _call_based_on_args('compute_vertex_normal_weighted', args, kwargs)
+@suppress_traceback
+def remove_corrupted_faces(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.remove_corrupted_faces, utils3d.torch.remove_corrupted_faces
+    return _call_based_on_args('remove_corrupted_faces', args, kwargs)
+@suppress_traceback
+def merge_duplicate_vertices(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.merge_duplicate_vertices, utils3d.torch.merge_duplicate_vertices
+    return _call_based_on_args('merge_duplicate_vertices', args, kwargs)
+@suppress_traceback
+def remove_unreferenced_vertices(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.remove_unreferenced_vertices, utils3d.torch.remove_unreferenced_vertices
+    return _call_based_on_args('remove_unreferenced_vertices', args, kwargs)
+@suppress_traceback
+def subdivide_mesh_simple(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.subdivide_mesh_simple, utils3d.torch.subdivide_mesh_simple
+    return _call_based_on_args('subdivide_mesh_simple', args, kwargs)
+@suppress_traceback
+def mesh_relations(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.mesh_relations, None
+    return _call_based_on_args('mesh_relations', args, kwargs)
+@suppress_traceback
+def flatten_mesh_indices(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.flatten_mesh_indices, None
+    return _call_based_on_args('flatten_mesh_indices', args, kwargs)
+@suppress_traceback
+def calc_quad_candidates(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.calc_quad_candidates, None
+    return _call_based_on_args('calc_quad_candidates', args, kwargs)
+@suppress_traceback
+def calc_quad_distortion(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.calc_quad_distortion, None
+    return _call_based_on_args('calc_quad_distortion', args, kwargs)
+@suppress_traceback
+def calc_quad_direction(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.calc_quad_direction, None
+    return _call_based_on_args('calc_quad_direction', args, kwargs)
+@suppress_traceback
+def calc_quad_smoothness(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.calc_quad_smoothness, None
+    return _call_based_on_args('calc_quad_smoothness', args, kwargs)
+@suppress_traceback
+def sovle_quad(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.sovle_quad, None
+    return _call_based_on_args('sovle_quad', args, kwargs)
+@suppress_traceback
+def sovle_quad_qp(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.sovle_quad_qp, None
+    return _call_based_on_args('sovle_quad_qp', args, kwargs)
+@suppress_traceback
+def tri_to_quad(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.tri_to_quad, None
+    return _call_based_on_args('tri_to_quad', args, kwargs)
+@suppress_traceback
+def sliding_window_1d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.sliding_window_1d, utils3d.torch.sliding_window_1d
+    return _call_based_on_args('sliding_window_1d', args, kwargs)
+@suppress_traceback
+def sliding_window_nd(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.sliding_window_nd, utils3d.torch.sliding_window_nd
+    return _call_based_on_args('sliding_window_nd', args, kwargs)
+@suppress_traceback
+def sliding_window_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.sliding_window_2d, utils3d.torch.sliding_window_2d
+    return _call_based_on_args('sliding_window_2d', args, kwargs)
+@suppress_traceback
+def max_pool_1d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.max_pool_1d, None
+    return _call_based_on_args('max_pool_1d', args, kwargs)
+@suppress_traceback
+def max_pool_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.max_pool_2d, None
+    return _call_based_on_args('max_pool_2d', args, kwargs)
+@suppress_traceback
+def max_pool_nd(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.max_pool_nd, None
+    return _call_based_on_args('max_pool_nd', args, kwargs)
+@suppress_traceback
+def depth_edge(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.depth_edge, utils3d.torch.depth_edge
+    return _call_based_on_args('depth_edge', args, kwargs)
+@suppress_traceback
+def normals_edge(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.normals_edge, None
+    return _call_based_on_args('normals_edge', args, kwargs)
+@suppress_traceback
+def depth_aliasing(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.depth_aliasing, utils3d.torch.depth_aliasing
+    return _call_based_on_args('depth_aliasing', args, kwargs)
+@suppress_traceback
+def interpolate(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.interpolate, None
+    return _call_based_on_args('interpolate', args, kwargs)
+@suppress_traceback
+def image_scrcoord(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_scrcoord, None
+    return _call_based_on_args('image_scrcoord', args, kwargs)
+@suppress_traceback
+def image_uv(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_uv, utils3d.torch.image_uv
+    return _call_based_on_args('image_uv', args, kwargs)
+@suppress_traceback
+def image_pixel_center(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_pixel_center, utils3d.torch.image_pixel_center
+    return _call_based_on_args('image_pixel_center', args, kwargs)
+@suppress_traceback
+def image_pixel(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_pixel, None
+    return _call_based_on_args('image_pixel', args, kwargs)
+@suppress_traceback
+def image_mesh(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_mesh, utils3d.torch.image_mesh
+    return _call_based_on_args('image_mesh', args, kwargs)
+@suppress_traceback
+def image_mesh_from_depth(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.image_mesh_from_depth, utils3d.torch.image_mesh_from_depth
+    return _call_based_on_args('image_mesh_from_depth', args, kwargs)
+@suppress_traceback
+def depth_to_normals(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.depth_to_normals, None
+    return _call_based_on_args('depth_to_normals', args, kwargs)
+@suppress_traceback
+def points_to_normals(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.points_to_normals, None
+    return _call_based_on_args('points_to_normals', args, kwargs)
+@suppress_traceback
+def chessboard(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.chessboard, utils3d.torch.chessboard
+    return _call_based_on_args('chessboard', args, kwargs)
+@suppress_traceback
+def cube(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.cube, None
+    return _call_based_on_args('cube', args, kwargs)
+@suppress_traceback
+def icosahedron(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.icosahedron, None
+    return _call_based_on_args('icosahedron', args, kwargs)
+@suppress_traceback
+def square(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.square, None
+    return _call_based_on_args('square', args, kwargs)
+@suppress_traceback
+def camera_frustum(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.camera_frustum, None
+    return _call_based_on_args('camera_frustum', args, kwargs)
+@suppress_traceback
+def perspective(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.perspective, utils3d.torch.perspective
+    return _call_based_on_args('perspective', args, kwargs)
+@suppress_traceback
+def perspective_from_fov(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.perspective_from_fov, utils3d.torch.perspective_from_fov
+    return _call_based_on_args('perspective_from_fov', args, kwargs)
+@suppress_traceback
+def perspective_from_fov_xy(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.perspective_from_fov_xy, utils3d.torch.perspective_from_fov_xy
+    return _call_based_on_args('perspective_from_fov_xy', args, kwargs)
+@suppress_traceback
+def intrinsics_from_focal_center(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.intrinsics_from_focal_center, utils3d.torch.intrinsics_from_focal_center
+    return _call_based_on_args('intrinsics_from_focal_center', args, kwargs)
+@suppress_traceback
+def intrinsics_from_fov(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.intrinsics_from_fov, utils3d.torch.intrinsics_from_fov
+    return _call_based_on_args('intrinsics_from_fov', args, kwargs)
+@suppress_traceback
+def fov_to_focal(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.fov_to_focal, None
+    return _call_based_on_args('fov_to_focal', args, kwargs)
+@suppress_traceback
+def focal_to_fov(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.focal_to_fov, None
+    return _call_based_on_args('focal_to_fov', args, kwargs)
+@suppress_traceback
+def intrinsics_to_fov(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.intrinsics_to_fov, None
+    return _call_based_on_args('intrinsics_to_fov', args, kwargs)
+@suppress_traceback
+def view_look_at(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.view_look_at, utils3d.torch.view_look_at
+    return _call_based_on_args('view_look_at', args, kwargs)
+@suppress_traceback
+def extrinsics_look_at(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.extrinsics_look_at, utils3d.torch.extrinsics_look_at
+    return _call_based_on_args('extrinsics_look_at', args, kwargs)
+@suppress_traceback
+def perspective_to_intrinsics(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.perspective_to_intrinsics, utils3d.torch.perspective_to_intrinsics
+    return _call_based_on_args('perspective_to_intrinsics', args, kwargs)
+@suppress_traceback
+def perspective_to_near_far(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.perspective_to_near_far, None
+    return _call_based_on_args('perspective_to_near_far', args, kwargs)
+@suppress_traceback
+def intrinsics_to_perspective(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.intrinsics_to_perspective, utils3d.torch.intrinsics_to_perspective
+    return _call_based_on_args('intrinsics_to_perspective', args, kwargs)
+@suppress_traceback
+def extrinsics_to_view(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.extrinsics_to_view, utils3d.torch.extrinsics_to_view
+    return _call_based_on_args('extrinsics_to_view', args, kwargs)
+@suppress_traceback
+def view_to_extrinsics(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.view_to_extrinsics, utils3d.torch.view_to_extrinsics
+    return _call_based_on_args('view_to_extrinsics', args, kwargs)
+@suppress_traceback
+def normalize_intrinsics(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.normalize_intrinsics, utils3d.torch.normalize_intrinsics
+    return _call_based_on_args('normalize_intrinsics', args, kwargs)
+@suppress_traceback
+def crop_intrinsics(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.crop_intrinsics, utils3d.torch.crop_intrinsics
+    return _call_based_on_args('crop_intrinsics', args, kwargs)
+@suppress_traceback
+def pixel_to_uv(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.pixel_to_uv, utils3d.torch.pixel_to_uv
+    return _call_based_on_args('pixel_to_uv', args, kwargs)
+@suppress_traceback
+def pixel_to_ndc(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.pixel_to_ndc, utils3d.torch.pixel_to_ndc
+    return _call_based_on_args('pixel_to_ndc', args, kwargs)
+@suppress_traceback
+def uv_to_pixel(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.uv_to_pixel, utils3d.torch.uv_to_pixel
+    return _call_based_on_args('uv_to_pixel', args, kwargs)
+@suppress_traceback
+def project_depth(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.project_depth, utils3d.torch.project_depth
+    return _call_based_on_args('project_depth', args, kwargs)
+@suppress_traceback
+def depth_buffer_to_linear(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.depth_buffer_to_linear, utils3d.torch.depth_buffer_to_linear
+    return _call_based_on_args('depth_buffer_to_linear', args, kwargs)
+@suppress_traceback
+def unproject_cv(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.unproject_cv, utils3d.torch.unproject_cv
+    return _call_based_on_args('unproject_cv', args, kwargs)
+@suppress_traceback
+def unproject_gl(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.unproject_gl, utils3d.torch.unproject_gl
+    return _call_based_on_args('unproject_gl', args, kwargs)
+@suppress_traceback
+def project_cv(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.project_cv, utils3d.torch.project_cv
+    return _call_based_on_args('project_cv', args, kwargs)
+@suppress_traceback
+def project_gl(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.project_gl, utils3d.torch.project_gl
+    return _call_based_on_args('project_gl', args, kwargs)
+@suppress_traceback
+def quaternion_to_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.quaternion_to_matrix, utils3d.torch.quaternion_to_matrix
+    return _call_based_on_args('quaternion_to_matrix', args, kwargs)
+@suppress_traceback
+def axis_angle_to_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.axis_angle_to_matrix, utils3d.torch.axis_angle_to_matrix
+    return _call_based_on_args('axis_angle_to_matrix', args, kwargs)
+@suppress_traceback
+def matrix_to_quaternion(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.matrix_to_quaternion, utils3d.torch.matrix_to_quaternion
+    return _call_based_on_args('matrix_to_quaternion', args, kwargs)
+@suppress_traceback
+def extrinsics_to_essential(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.extrinsics_to_essential, utils3d.torch.extrinsics_to_essential
+    return _call_based_on_args('extrinsics_to_essential', args, kwargs)
+@suppress_traceback
+def euler_axis_angle_rotation(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.euler_axis_angle_rotation, utils3d.torch.euler_axis_angle_rotation
+    return _call_based_on_args('euler_axis_angle_rotation', args, kwargs)
+@suppress_traceback
+def euler_angles_to_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.euler_angles_to_matrix, utils3d.torch.euler_angles_to_matrix
+    return _call_based_on_args('euler_angles_to_matrix', args, kwargs)
+@suppress_traceback
+def skew_symmetric(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.skew_symmetric, utils3d.torch.skew_symmetric
+    return _call_based_on_args('skew_symmetric', args, kwargs)
+@suppress_traceback
+def rotation_matrix_from_vectors(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.rotation_matrix_from_vectors, utils3d.torch.rotation_matrix_from_vectors
+    return _call_based_on_args('rotation_matrix_from_vectors', args, kwargs)
+@suppress_traceback
+def ray_intersection(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.ray_intersection, None
+    return _call_based_on_args('ray_intersection', args, kwargs)
+@suppress_traceback
+def se3_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.se3_matrix, None
+    return _call_based_on_args('se3_matrix', args, kwargs)
+@suppress_traceback
+def slerp_quaternion(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.slerp_quaternion, None
+    return _call_based_on_args('slerp_quaternion', args, kwargs)
+@suppress_traceback
+def slerp_vector(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.slerp_vector, None
+    return _call_based_on_args('slerp_vector', args, kwargs)
+@suppress_traceback
+def lerp(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.lerp, None
+    return _call_based_on_args('lerp', args, kwargs)
+@suppress_traceback
+def lerp_se3_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.lerp_se3_matrix, None
+    return _call_based_on_args('lerp_se3_matrix', args, kwargs)
+@suppress_traceback
+def piecewise_lerp(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.piecewise_lerp, None
+    return _call_based_on_args('piecewise_lerp', args, kwargs)
+@suppress_traceback
+def piecewise_lerp_se3_matrix(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.piecewise_lerp_se3_matrix, None
+    return _call_based_on_args('piecewise_lerp_se3_matrix', args, kwargs)
+@suppress_traceback
+def apply_transform(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.apply_transform, None
+    return _call_based_on_args('apply_transform', args, kwargs)
+@suppress_traceback
+def linear_spline_interpolate(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.linear_spline_interpolate, None
+    return _call_based_on_args('linear_spline_interpolate', args, kwargs)
+@suppress_traceback
+def RastContext(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.RastContext, utils3d.torch.RastContext
+    return _call_based_on_args('RastContext', args, kwargs)
+@suppress_traceback
+def rasterize_triangle_faces(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.rasterize_triangle_faces, utils3d.torch.rasterize_triangle_faces
+    return _call_based_on_args('rasterize_triangle_faces', args, kwargs)
+@suppress_traceback
+def rasterize_edges(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.rasterize_edges, None
+    return _call_based_on_args('rasterize_edges', args, kwargs)
+@suppress_traceback
+def texture(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.texture, None
+    return _call_based_on_args('texture', args, kwargs)
+@suppress_traceback
+def warp_image_by_depth(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.warp_image_by_depth, utils3d.torch.warp_image_by_depth
+    return _call_based_on_args('warp_image_by_depth', args, kwargs)
+@suppress_traceback
+def test_rasterization(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        utils3d.numpy.test_rasterization, None
+    return _call_based_on_args('test_rasterization', args, kwargs)
+@suppress_traceback
+def compute_face_angles(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.compute_face_angles
+    return _call_based_on_args('compute_face_angles', args, kwargs)
+@suppress_traceback
+def compute_face_tbn(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.compute_face_tbn
+    return _call_based_on_args('compute_face_tbn', args, kwargs)
+@suppress_traceback
+def compute_vertex_tbn(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.compute_vertex_tbn
+    return _call_based_on_args('compute_vertex_tbn', args, kwargs)
+@suppress_traceback
+def laplacian(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.laplacian
+    return _call_based_on_args('laplacian', args, kwargs)
+@suppress_traceback
+def laplacian_smooth_mesh(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.laplacian_smooth_mesh
+    return _call_based_on_args('laplacian_smooth_mesh', args, kwargs)
+@suppress_traceback
+def taubin_smooth_mesh(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.taubin_smooth_mesh
+    return _call_based_on_args('taubin_smooth_mesh', args, kwargs)
+@suppress_traceback
+def laplacian_hc_smooth_mesh(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.laplacian_hc_smooth_mesh
+    return _call_based_on_args('laplacian_hc_smooth_mesh', args, kwargs)
+@suppress_traceback
+def get_rays(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.get_rays
+    return _call_based_on_args('get_rays', args, kwargs)
+@suppress_traceback
+def get_image_rays(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.get_image_rays
+    return _call_based_on_args('get_image_rays', args, kwargs)
+@suppress_traceback
+def get_mipnerf_cones(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.get_mipnerf_cones
+    return _call_based_on_args('get_mipnerf_cones', args, kwargs)
+@suppress_traceback
+def volume_rendering(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.volume_rendering
+    return _call_based_on_args('volume_rendering', args, kwargs)
+@suppress_traceback
+def bin_sample(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.bin_sample
+    return _call_based_on_args('bin_sample', args, kwargs)
+@suppress_traceback
+def importance_sample(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.importance_sample
+    return _call_based_on_args('importance_sample', args, kwargs)
+@suppress_traceback
+def nerf_render_rays(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.nerf_render_rays
+    return _call_based_on_args('nerf_render_rays', args, kwargs)
+@suppress_traceback
+def mipnerf_render_rays(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.mipnerf_render_rays
+    return _call_based_on_args('mipnerf_render_rays', args, kwargs)
+@suppress_traceback
+def nerf_render_view(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.nerf_render_view
+    return _call_based_on_args('nerf_render_view', args, kwargs)
+@suppress_traceback
+def mipnerf_render_view(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.mipnerf_render_view
+    return _call_based_on_args('mipnerf_render_view', args, kwargs)
+@suppress_traceback
+def InstantNGP(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.InstantNGP
+    return _call_based_on_args('InstantNGP', args, kwargs)
+@suppress_traceback
+def point_to_normal(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.point_to_normal
+    return _call_based_on_args('point_to_normal', args, kwargs)
+@suppress_traceback
+def depth_to_normal(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.depth_to_normal
+    return _call_based_on_args('depth_to_normal', args, kwargs)
+@suppress_traceback
+def masked_min(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.masked_min
+    return _call_based_on_args('masked_min', args, kwargs)
+@suppress_traceback
+def masked_max(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.masked_max
+    return _call_based_on_args('masked_max', args, kwargs)
+@suppress_traceback
+def bounding_rect(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.bounding_rect
+    return _call_based_on_args('bounding_rect', args, kwargs)
+@suppress_traceback
+def intrinsics_from_fov_xy(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.intrinsics_from_fov_xy
+    return _call_based_on_args('intrinsics_from_fov_xy', args, kwargs)
+@suppress_traceback
+def matrix_to_euler_angles(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.matrix_to_euler_angles
+    return _call_based_on_args('matrix_to_euler_angles', args, kwargs)
+@suppress_traceback
+def matrix_to_axis_angle(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.matrix_to_axis_angle
+    return _call_based_on_args('matrix_to_axis_angle', args, kwargs)
+@suppress_traceback
+def axis_angle_to_quaternion(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.axis_angle_to_quaternion
+    return _call_based_on_args('axis_angle_to_quaternion', args, kwargs)
+@suppress_traceback
+def quaternion_to_axis_angle(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.quaternion_to_axis_angle
+    return _call_based_on_args('quaternion_to_axis_angle', args, kwargs)
+@suppress_traceback
+def slerp(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.slerp
+    return _call_based_on_args('slerp', args, kwargs)
+@suppress_traceback
+def interpolate_extrinsics(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.interpolate_extrinsics
+    return _call_based_on_args('interpolate_extrinsics', args, kwargs)
+@suppress_traceback
+def interpolate_view(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.interpolate_view
+    return _call_based_on_args('interpolate_view', args, kwargs)
+@suppress_traceback
+def to4x4(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.to4x4
+    return _call_based_on_args('to4x4', args, kwargs)
+@suppress_traceback
+def rotation_matrix_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.rotation_matrix_2d
+    return _call_based_on_args('rotation_matrix_2d', args, kwargs)
+@suppress_traceback
+def rotate_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.rotate_2d
+    return _call_based_on_args('rotate_2d', args, kwargs)
+@suppress_traceback
+def translate_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.translate_2d
+    return _call_based_on_args('translate_2d', args, kwargs)
+@suppress_traceback
+def scale_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.scale_2d
+    return _call_based_on_args('scale_2d', args, kwargs)
+@suppress_traceback
+def apply_2d(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.apply_2d
+    return _call_based_on_args('apply_2d', args, kwargs)
+@suppress_traceback
+def warp_image_by_forward_flow(*args, **kwargs):
+    if TYPE_CHECKING:  # redirected to:
+        None, utils3d.torch.warp_image_by_forward_flow
+    return _call_based_on_args('warp_image_by_forward_flow', args, kwargs)

src/utils3d/_unified/__init__.pyi ADDED Viewed

The diff for this file is too large to render. See raw diff

src/utils3d/io/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .obj import *
+from .colmap import *
+from .ply import *

src/utils3d/io/colmap.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from typing import *
+from pathlib import Path
+import numpy as np
+from scipy.spatial.transform import Rotation
+__all__ = ['read_extrinsics_from_colmap', 'read_intrinsics_from_colmap', 'write_extrinsics_as_colmap', 'write_intrinsics_as_colmap']
+def write_extrinsics_as_colmap(file: Union[str, Path], extrinsics: np.ndarray, image_names: Union[str, List[str]] = 'image_{i:04d}.png', camera_ids: List[int] = None):
+    """
+    Write extrinsics to colmap `images.txt` file.
+    Args:
+        file: Path to `images.txt` file.
+        extrinsics: (N, 4, 4) array of extrinsics.
+        image_names: str or List of str, image names. Length is N.
+            If str, it should be a format string with `i` as the index. (i starts from 1, in correspondence with IMAGE_ID in colmap)
+        camera_ids: List of int, camera ids. Length is N.
+            If None, it will be set to [1, 2, ..., N].
+    """
+    assert extrinsics.shape[1:] == (4, 4) and extrinsics.ndim == 3 or extrinsics.shape == (4, 4)
+    if extrinsics.ndim == 2:
+        extrinsics = extrinsics[np.newaxis, ...]
+    quats = Rotation.from_matrix(extrinsics[:, :3, :3]).as_quat()
+    trans = extrinsics[:, :3, 3]
+    if camera_ids is None:
+        camera_ids = list(range(1, len(extrinsics) + 1))
+    if isinstance(image_names, str):
+        image_names = [image_names.format(i=i) for i in range(1, len(extrinsics) + 1)]
+    assert len(extrinsics) == len(image_names) == len(camera_ids), \
+        f'Number of extrinsics ({len(extrinsics)}), image_names ({len(image_names)}), and camera_ids ({len(camera_ids)}) must be the same'
+    with open(file, 'w') as fp:
+        print("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME", file=fp)
+        for i, (quat, t, name, camera_id) in enumerate(zip(quats.tolist(), trans.tolist(), image_names, camera_ids)):
+            # Colmap has wxyz order while scipy.spatial.transform.Rotation has xyzw order.
+            qx, qy, qz, qw = quat
+            tx, ty, tz = t
+            print(f'{i + 1} {qw:f} {qx:f} {qy:f} {qz:f} {tx:f} {ty:f} {tz:f} {camera_id:d} {name}', file=fp)
+            print()
+def write_intrinsics_as_colmap(file: Union[str, Path], intrinsics: np.ndarray, width: int, height: int, normalized: bool = False):
+    """
+    Write intrinsics to colmap `cameras.txt` file. Currently only support PINHOLE model (no distortion)
+    Args:
+        file: Path to `cameras.txt` file.
+        intrinsics: (N, 3, 3) array of intrinsics.
+        width: Image width.
+        height: Image height.
+        normalized: Whether the intrinsics are normalized. If True, the intrinsics will unnormalized for writing.
+    """
+    assert intrinsics.shape[1:] == (3, 3) and intrinsics.ndim == 3 or intrinsics.shape == (3, 3)
+    if intrinsics.ndim == 2:
+        intrinsics = intrinsics[np.newaxis, ...]
+    if normalized:
+        intrinsics = intrinsics * np.array([width, height, 1])[:, None]
+    with open(file, 'w') as fp:
+        print("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]", file=fp)
+        for i, intr in enumerate(intrinsics):
+            fx, fy, cx, cy = intr[0, 0], intr[1, 1], intr[0, 2], intr[1, 2]
+            print(f'{i + 1} PINHOLE {width:d} {height:d} {fx:f} {fy:f} {cx:f} {cy:f}', file=fp)
+def read_extrinsics_from_colmap(file: Union[str, Path]) -> Union[np.ndarray, List[int], List[str]]:
+    """
+    Read extrinsics from colmap `images.txt` file.
+    Args:
+        file: Path to `images.txt` file.
+    Returns:
+        extrinsics: (N, 4, 4) array of extrinsics.
+        camera_ids: List of int, camera ids. Length is N. Note that camera ids in colmap typically starts from 1.
+        image_names: List of str, image names. Length is N.
+    """
+    with open(file) as fp:
+        lines = fp.readlines()
+    image_names, quats, trans, camera_ids = [], [], [], []
+    i_line = 0
+    for line in lines:
+        line = line.strip()
+        if line.startswith('#'):
+            continue
+        i_line += 1
+        if i_line % 2 == 0:
+            continue
+        image_id, qw, qx, qy, qz, tx, ty, tz, camera_id, name = line.split()
+        quats.append([float(qx), float(qy), float(qz), float(qw)])
+        trans.append([float(tx), float(ty), float(tz)])
+        camera_ids.append(int(camera_id))
+        image_names.append(name)
+    quats = np.array(quats, dtype=np.float32)
+    trans = np.array(trans, dtype=np.float32)
+    rotation = Rotation.from_quat(quats).as_matrix()
+    extrinsics = np.concatenate([
+        np.concatenate([rotation, trans[..., None]], axis=-1),
+        np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :].repeat(len(quats), axis=0)
+    ], axis=-2)
+    return extrinsics, camera_ids, image_names
+def read_intrinsics_from_colmap(file: Union[str, Path], normalize: bool = False) -> Tuple[List[int], np.ndarray, np.ndarray]:
+    """
+    Read intrinsics from colmap `cameras.txt` file.
+    Args:
+        file: Path to `cameras.txt` file.
+        normalize: Whether to normalize the intrinsics. If True, the intrinsics will be normalized. (mapping coordinates to [0, 1] range)
+    Returns:
+        camera_ids: List of int, camera ids. Length is N. Note that camera ids in colmap typically starts from 1.
+        intrinsics: (N, 3, 3) array of intrinsics.
+        distortions: (N, 5) array of distortions.
+    """
+    with open(file) as fp:
+        lines = fp.readlines()
+    intrinsics, distortions, camera_ids = [], [], []
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        camera_id, model, width, height, *params = line.split()
+        camera_id, width, height = int(camera_id), int(width), int(height)
+        if model == 'PINHOLE':
+            fx, fy, cx, cy = map(float, params[:4])
+            k1 = k2 = k3 = p1 = p2 = 0.0
+        elif model == 'OPENCV':
+            fx, fy, cx, cy, k1, k2, p1, p2, k3 = *map(float, params[:8]), 0.0
+        elif model == 'SIMPLE_RADIAL':
+            f, cx, cy, k = map(float, params[:4])
+            fx = fy = f
+            k1, k2, p1, p2, k3 = k, 0.0, 0.0, 0.0, 0.0
+        camera_ids.append(camera_id)
+        if normalize:
+            fx, fy, cx, cy = fx / width, fy / height, cx / width, cy / height
+        intrinsics.append([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+        distortions.append([k1, k2, p1, p2, k3])
+    intrinsics = np.array(intrinsics, dtype=np.float32)
+    distortions = np.array(distortions, dtype=np.float32)
+    return camera_ids, intrinsics, distortions

src/utils3d/io/obj.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from io import TextIOWrapper
+from typing import Dict, Any, Union, Iterable
+import numpy as np
+from pathlib import Path
+__all__ = [
+    'read_obj',
+    'write_obj',
+    'simple_write_obj'
+]
+def read_obj(
+    file : Union[str, Path, TextIOWrapper],
+    encoding: Union[str, None] = None,
+    ignore_unknown: bool = False
+):
+    """
+    Read wavefront .obj file, without preprocessing.
+    Why bothering having this read_obj() while we already have other libraries like `trimesh`?
+    This function read the raw format from .obj file and keeps the order of vertices and faces,
+    while trimesh which involves modification like merge/split vertices, which could break the orders of vertices and faces,
+    Those libraries are commonly aiming at geometry processing and rendering supporting various formats.
+    If you want mesh geometry processing, you may turn to `trimesh` for more features.
+    ### Parameters
+        `file` (str, Path, TextIOWrapper): filepath or file object
+        encoding (str, optional):
+    ### Returns
+        obj (dict): A dict containing .obj components
+        {
+            'mtllib': [],
+            'v': [[0,1, 0.2, 1.0], [1.2, 0.0, 0.0], ...],
+            'vt': [[0.5, 0.5], ...],
+            'vn': [[0., 0.7, 0.7], [0., -0.7, 0.7], ...],
+            'f': [[0, 1, 2], [2, 3, 4],...],
+            'usemtl': [{'name': 'mtl1', 'f': 7}]
+        }
+    """
+    if hasattr(file,'read'):
+        lines = file.read().splitlines()
+    else:
+        with open(file, 'r', encoding=encoding) as fp:
+            lines = fp.read().splitlines()
+    mtllib = []
+    v, vt, vn, vp = [], [], [], []      # Vertex coordinates, Vertex texture coordinate, Vertex normal, Vertex parameter
+    f, ft, fn = [], [], []              # Face indices, Face texture indices, Face normal indices
+    o = []
+    s = []
+    usemtl = []
+    def pad(l: list, n: Any):
+        return l + [n] * (3 - len(l))
+    for i, line in enumerate(lines):
+        sq = line.strip().split()
+        if len(sq) == 0:
+            continue
+        if sq[0] == 'v':
+            assert 4 <= len(sq) <= 5, f'Invalid format of line {i}: {line}'
+            v.append([float(e) for e in sq[1:]][:3])
+        elif sq[0] == 'vt':
+            assert 3 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vt.append([float(e) for e in sq[1:]][:2])
+        elif sq[0] == 'vn':
+            assert len(sq) == 4, f'Invalid format of line {i}: {line}'
+            vn.append([float(e) for e in sq[1:]])
+        elif sq[0] == 'vp':
+            assert 2 <= len(sq) <= 4, f'Invalid format of line {i}: {line}'
+            vp.append(pad([float(e) for e in sq[1:]], 0))
+        elif sq[0] == 'f':
+            spliting = [pad([int(j) - 1 for j in e.split('/')], -1) for e in sq[1:]]
+            f.append([e[0] for e in spliting])
+            ft.append([e[1] for e in spliting])
+            fn.append([e[2] for e in spliting])
+        elif sq[0] == 'usemtl':
+            assert len(sq) == 2
+            usemtl.append((sq[1], len(f)))
+        elif sq[0] == 'o':
+            assert len(sq) == 2
+            o.append((sq[1], len(f)))
+        elif sq[0] == 's':
+            s.append((sq[1], len(f)))
+        elif sq[0] == 'mtllib':
+            assert len(sq) == 2
+            mtllib.append(sq[1])
+        elif sq[0][0] == '#':
+            continue
+        else:
+            if not ignore_unknown:
+                raise Exception(f'Unknown keyword {sq[0]}')
+    min_poly_vertices = min(len(f) for f in f)
+    max_poly_vertices = max(len(f) for f in f)
+    return {
+        'mtllib': mtllib,
+        'v': np.array(v, dtype=np.float32),
+        'vt': np.array(vt, dtype=np.float32),
+        'vn': np.array(vn, dtype=np.float32),
+        'vp': np.array(vp, dtype=np.float32),
+        'f': np.array(f, dtype=np.int32) if min_poly_vertices == max_poly_vertices else f,
+        'ft': np.array(ft, dtype=np.int32) if min_poly_vertices == max_poly_vertices else ft,
+        'fn': np.array(fn, dtype=np.int32) if min_poly_vertices == max_poly_vertices else fn,
+        'o': o,
+        's': s,
+        'usemtl': usemtl,
+    }
+def write_obj(
+        file: Union[str, Path],
+        obj: Dict[str, Any],
+        encoding: Union[str, None] = None
+    ):
+    with open(file, 'w', encoding=encoding) as fp:
+        for k in ['v', 'vt', 'vn', 'vp']:
+            if k not in obj:
+                continue
+            for v in obj[k]:
+                print(k, *map(float, v), file=fp)
+        for f in obj['f']:
+            print('f', *((str('/').join(map(int, i)) if isinstance(int(i), Iterable) else i) for i in f), file=fp)
+def simple_write_obj(
+        file: Union[str, Path],
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        encoding: Union[str, None] = None
+    ):
+    """
+    Write wavefront .obj file, without preprocessing.
+    Args:
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, 3]
+        file (Any): filepath
+        encoding (str, optional):
+    """
+    with open(file, 'w', encoding=encoding) as fp:
+        for v in vertices:
+            print('v', *map(float, v), file=fp)
+        for f in faces:
+            print('f', *map(int, f + 1), file=fp)

src/utils3d/io/ply.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import numpy as np
+from typing import *
+from pathlib import Path
+def read_ply(
+    file: Union[str, Path],
+    encoding: Union[str, None] = None,
+    ignore_unknown: bool = False
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Read .ply file, without preprocessing.
+    Args:
+        file (Any): filepath
+        encoding (str, optional):
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: vertices, faces
+    """
+    import plyfile
+    plydata = plyfile.PlyData.read(file)
+    vertices = np.stack([plydata['vertex'][k] for k in ['x', 'y', 'z']], axis=-1)
+    if 'face' in plydata:
+        faces = np.array(plydata['face']['vertex_indices'].tolist())
+    else:
+        faces = None
+    return vertices, faces
+def write_ply(
+    file: Union[str, Path],
+    vertices: np.ndarray,
+    faces: np.ndarray = None,
+    edges: np.ndarray = None,
+    vertex_colors: np.ndarray = None,
+    edge_colors: np.ndarray = None,
+    text: bool = False
+):
+    """
+    Write .ply file, without preprocessing.
+    Args:
+        file (Any): filepath
+        vertices (np.ndarray): [N, 3]
+        faces (np.ndarray): [T, E]
+        edges (np.ndarray): [E, 2]
+        vertex_colors (np.ndarray, optional): [N, 3]. Defaults to None.
+        edge_colors (np.ndarray, optional): [E, 3]. Defaults to None.
+        text (bool, optional): save data in text format. Defaults to False.
+    """
+    import plyfile
+    assert vertices.ndim == 2 and vertices.shape[1] == 3
+    vertices = vertices.astype(np.float32)
+    if faces is not None:
+        assert faces.ndim == 2
+        faces = faces.astype(np.int32)
+    if edges is not None:
+        assert edges.ndim == 2 and edges.shape[1] == 2
+        edges = edges.astype(np.int32)
+    if vertex_colors is not None:
+        assert vertex_colors.ndim == 2 and vertex_colors.shape[1] == 3
+        if vertex_colors.dtype in [np.float32, np.float64]:
+            vertex_colors = vertex_colors * 255
+        vertex_colors = np.clip(vertex_colors, 0, 255).astype(np.uint8)
+        vertices_data = np.zeros(len(vertices), dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')])
+        vertices_data['x'] = vertices[:, 0]
+        vertices_data['y'] = vertices[:, 1]
+        vertices_data['z'] = vertices[:, 2]
+        vertices_data['red'] = vertex_colors[:, 0]
+        vertices_data['green'] = vertex_colors[:, 1]
+        vertices_data['blue'] = vertex_colors[:, 2]
+    else:
+        vertices_data = np.array([tuple(v) for v in vertices], dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')])
+    if faces is not None:
+        faces_data = np.zeros(len(faces), dtype=[('vertex_indices', 'i4', (faces.shape[1],))])
+        faces_data['vertex_indices'] = faces
+    if edges is not None:
+        if edge_colors is not None:
+            assert edge_colors.ndim == 2 and edge_colors.shape[1] == 3
+            if edge_colors.dtype in [np.float32, np.float64]:
+                edge_colors = edge_colors * 255
+            edge_colors = np.clip(edge_colors, 0, 255).astype(np.uint8)
+            edges_data = np.zeros(len(edges), dtype=[('vertex1', 'i4'), ('vertex2', 'i4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')])
+            edges_data['vertex1'] = edges[:, 0]
+            edges_data['vertex2'] = edges[:, 1]
+            edges_data['red'] = edge_colors[:, 0]
+            edges_data['green'] = edge_colors[:, 1]
+            edges_data['blue'] = edge_colors[:, 2]
+        else:
+            edges_data = np.array([tuple(e) for e in edges], dtype=[('vertex1', 'i4'), ('vertex2', 'i4')])
+    ply_data = [plyfile.PlyElement.describe(vertices_data, 'vertex')]
+    if faces is not None:
+        ply_data.append(plyfile.PlyElement.describe(faces_data, 'face'))
+    if edges is not None:
+        ply_data.append(plyfile.PlyElement.describe(edges_data, 'edge'))
+    plyfile.PlyData(ply_data, text=text).write(file)

src/utils3d/numpy/__init__.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""
+3D utility functions workings with NumPy.
+"""
+import importlib
+import itertools
+import numpy
+from typing import TYPE_CHECKING
+__modules_all__ = {
+    'mesh':[
+        'triangulate',
+        'compute_face_normal',
+        'compute_face_angle',
+        'compute_vertex_normal',
+        'compute_vertex_normal_weighted',
+        'remove_corrupted_faces',
+        'merge_duplicate_vertices',
+        'remove_unreferenced_vertices',
+        'subdivide_mesh_simple',
+        'mesh_relations',
+        'flatten_mesh_indices'
+    ],
+    'quadmesh': [
+        'calc_quad_candidates',
+        'calc_quad_distortion',
+        'calc_quad_direction',
+        'calc_quad_smoothness',
+        'sovle_quad',
+        'sovle_quad_qp',
+        'tri_to_quad'
+    ],
+    'utils': [
+        'sliding_window_1d',
+        'sliding_window_nd',
+        'sliding_window_2d',
+        'max_pool_1d',
+        'max_pool_2d',
+        'max_pool_nd',
+        'depth_edge',
+        'normals_edge',
+        'depth_aliasing',
+        'interpolate',
+        'image_scrcoord',
+        'image_uv',
+        'image_pixel_center',
+        'image_pixel',
+        'image_mesh',
+        'image_mesh_from_depth',
+        'depth_to_normals',
+        'points_to_normals',
+        'chessboard',
+        'cube',
+        'icosahedron',
+        'square',
+        'camera_frustum',
+    ],
+    'transforms': [
+        'perspective',
+        'perspective_from_fov',
+        'perspective_from_fov_xy',
+        'intrinsics_from_focal_center',
+        'intrinsics_from_fov',
+        'fov_to_focal',
+        'focal_to_fov',
+        'intrinsics_to_fov',
+        'view_look_at',
+        'extrinsics_look_at',
+        'perspective_to_intrinsics',
+        'perspective_to_near_far',
+        'intrinsics_to_perspective',
+        'extrinsics_to_view',
+        'view_to_extrinsics',
+        'normalize_intrinsics',
+        'crop_intrinsics',
+        'pixel_to_uv',
+        'pixel_to_ndc',
+        'uv_to_pixel',
+        'project_depth',
+        'depth_buffer_to_linear',
+        'unproject_cv',
+        'unproject_gl',
+        'project_cv',
+        'project_gl',
+        'quaternion_to_matrix',
+        'axis_angle_to_matrix',
+        'matrix_to_quaternion',
+        'extrinsics_to_essential',
+        'euler_axis_angle_rotation',
+        'euler_angles_to_matrix',
+        'skew_symmetric',
+        'rotation_matrix_from_vectors',
+        'ray_intersection',
+        'se3_matrix',
+        'slerp_quaternion',
+        'slerp_vector',
+        'lerp',
+        'lerp_se3_matrix',
+        'piecewise_lerp',
+        'piecewise_lerp_se3_matrix',
+        'apply_transform'
+    ],
+    'spline': [
+        'linear_spline_interpolate',
+    ],
+    'rasterization': [
+        'RastContext',
+        'rasterize_triangle_faces',
+        'rasterize_edges',
+        'texture',
+        'warp_image_by_depth',
+        'test_rasterization'
+    ],
+}
+__all__ = list(itertools.chain(*__modules_all__.values()))
+def __getattr__(name):
+    try:
+        return globals()[name]
+    except KeyError:
+        pass
+    try:
+        module_name = next(m for m in __modules_all__ if name in __modules_all__[m])
+    except StopIteration:
+        raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+    module = importlib.import_module(f'.{module_name}', __name__)
+    for key in __modules_all__[module_name]:
+        globals()[key] = getattr(module, key)
+    return globals()[name]
+if TYPE_CHECKING:
+    from .quadmesh import *
+    from .transforms import *
+    from .mesh import *
+    from .utils import *
+    from .rasterization import *
+    from .spline import *

src/utils3d/numpy/_helpers.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# decorator
+import numpy as np
+from numbers import Number
+import inspect
+from functools import wraps
+from typing import *
+from .._helpers import suppress_traceback
+def get_args_order(func, args, kwargs):
+    """
+    Get the order of the arguments of a function.
+    """
+    names = inspect.getfullargspec(func).args
+    names_idx = {name: i for i, name in enumerate(names)}
+    args_order = []
+    kwargs_order = {}
+    for name, arg in kwargs.items():
+        if name in names:
+            kwargs_order[name] = names_idx[name]
+            names.remove(name)
+    for i, arg in enumerate(args):
+        if i < len(names):
+            args_order.append(names_idx[names[i]])
+    return args_order, kwargs_order
+def broadcast_args(args, kwargs, args_dim, kwargs_dim):
+    spatial = []
+    for arg, arg_dim in zip(args + list(kwargs.values()), args_dim + list(kwargs_dim.values())):
+        if isinstance(arg, np.ndarray) and arg_dim is not None:
+            arg_spatial = arg.shape[:arg.ndim-arg_dim]
+            if len(arg_spatial) > len(spatial):
+                spatial = [1] * (len(arg_spatial) - len(spatial)) + spatial
+            for j in range(len(arg_spatial)):
+                if spatial[-j] < arg_spatial[-j]:
+                    if spatial[-j] == 1:
+                        spatial[-j] = arg_spatial[-j]
+                    else:
+                        raise ValueError("Cannot broadcast arguments.")
+    for i, arg in enumerate(args):
+        if isinstance(arg, np.ndarray) and args_dim[i] is not None:
+            args[i] = np.broadcast_to(arg, [*spatial, *arg.shape[arg.ndim-args_dim[i]:]])
+    for key, arg in kwargs.items():
+        if isinstance(arg, np.ndarray) and kwargs_dim[key] is not None:
+            kwargs[key] = np.broadcast_to(arg, [*spatial, *arg.shape[arg.ndim-kwargs_dim[key]:]])
+    return args, kwargs, spatial
+def batched(*dims):
+    """
+    Decorator that allows a function to be called with batched arguments.
+    """
+    def decorator(func):
+        @wraps(func)
+        @suppress_traceback
+        def wrapper(*args, **kwargs):
+            args = list(args)
+            # get arguments dimensions
+            args_order, kwargs_order = get_args_order(func, args, kwargs)
+            args_dim = [dims[i] for i in args_order]
+            kwargs_dim = {key: dims[i] for key, i in kwargs_order.items()}
+            # convert to numpy array
+            for i, arg in enumerate(args):
+                if isinstance(arg, (Number, list, tuple)) and args_dim[i] is not None:
+                    args[i] = np.array(arg)
+            for key, arg in kwargs.items():
+                if isinstance(arg, (Number, list, tuple)) and kwargs_dim[key] is not None:
+                    kwargs[key] = np.array(arg)
+            # broadcast arguments
+            args, kwargs, spatial = broadcast_args(args, kwargs, args_dim, kwargs_dim)
+            for i, (arg, arg_dim) in enumerate(zip(args, args_dim)):
+                if isinstance(arg, np.ndarray) and arg_dim is not None:
+                    args[i] = arg.reshape([-1, *arg.shape[arg.ndim-arg_dim:]])
+            for key, arg in kwargs.items():
+                if isinstance(arg, np.ndarray) and kwargs_dim[key] is not None:
+                    kwargs[key] = arg.reshape([-1, *arg.shape[arg.ndim-kwargs_dim[key]:]])
+            # call function
+            results = func(*args, **kwargs)
+            type_results = type(results)
+            results = list(results) if isinstance(results, (tuple, list)) else [results]
+            # restore spatial dimensions
+            for i, result in enumerate(results):
+                results[i] = result.reshape([*spatial, *result.shape[1:]])
+            if type_results == tuple:
+                results = tuple(results)
+            elif type_results == list:
+                results = list(results)
+            else:
+                results = results[0]
+            return results
+        return wrapper
+    return decorator

src/utils3d/numpy/mesh.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import numpy as np
+from typing import *
+from ._helpers import batched
+__all__ = [
+    'triangulate',
+    'compute_face_normal',
+    'compute_face_angle',
+    'compute_vertex_normal',
+    'compute_vertex_normal_weighted',
+    'remove_corrupted_faces',
+    'merge_duplicate_vertices',
+    'remove_unreferenced_vertices',
+    'subdivide_mesh_simple',
+    'mesh_relations',
+    'flatten_mesh_indices'
+]
+def triangulate(
+    faces: np.ndarray,
+    vertices: np.ndarray = None,
+    backslash: np.ndarray = None
+) -> np.ndarray:
+    """
+    Triangulate a polygonal mesh.
+    Args:
+        faces (np.ndarray): [L, P] polygonal faces
+        vertices (np.ndarray, optional): [N, 3] 3-dimensional vertices.
+            If given, the triangulation is performed according to the distance
+            between vertices. Defaults to None.
+        backslash (np.ndarray, optional): [L] boolean array indicating
+            how to triangulate the quad faces. Defaults to None.
+    Returns:
+        (np.ndarray): [L * (P - 2), 3] triangular faces
+    """
+    if faces.shape[-1] == 3:
+        return faces
+    P = faces.shape[-1]
+    if vertices is not None:
+        assert faces.shape[-1] == 4, "now only support quad mesh"
+        if backslash is None:
+            backslash = np.linalg.norm(vertices[faces[:, 0]] - vertices[faces[:, 2]], axis=-1) < \
+                        np.linalg.norm(vertices[faces[:, 1]] - vertices[faces[:, 3]], axis=-1)
+    if backslash is None:
+        loop_indice = np.stack([
+            np.zeros(P - 2, dtype=int),
+            np.arange(1, P - 1, 1, dtype=int),
+            np.arange(2, P, 1, dtype=int)
+        ], axis=1)
+        return faces[:, loop_indice].reshape((-1, 3))
+    else:
+        assert faces.shape[-1] == 4, "now only support quad mesh"
+        faces = np.where(
+            backslash[:, None],
+            faces[:, [0, 1, 2, 0, 2, 3]],
+            faces[:, [0, 1, 3, 3, 1, 2]]
+        ).reshape((-1, 3))
+        return faces
+@batched(2, None)
+def compute_face_normal(
+    vertices: np.ndarray,
+    faces: np.ndarray
+) -> np.ndarray:
+    """
+    Compute face normals of a triangular mesh
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        normals (np.ndarray): [..., T, 3] face normals
+    """
+    normal = np.cross(
+        vertices[..., faces[:, 1], :] - vertices[..., faces[:, 0], :],
+        vertices[..., faces[:, 2], :] - vertices[..., faces[:, 0], :]
+    )
+    normal_norm = np.linalg.norm(normal, axis=-1, keepdims=True)
+    normal_norm[normal_norm == 0] = 1
+    normal /= normal_norm
+    return normal
+@batched(2, None)
+def compute_face_angle(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        eps: float = 1e-12
+    ) -> np.ndarray:
+    """
+    Compute face angles of a triangular mesh
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        angles (np.ndarray): [..., T, 3] face angles
+    """
+    face_angle = np.zeros_like(faces, dtype=vertices.dtype)
+    for i in range(3):
+        edge1 = vertices[..., faces[:, (i + 1) % 3], :] - vertices[..., faces[:, i], :]
+        edge2 = vertices[..., faces[:, (i + 2) % 3], :] - vertices[..., faces[:, i], :]
+        face_angle[..., i] = np.arccos(np.sum(
+            edge1 / np.clip(np.linalg.norm(edge1, axis=-1, keepdims=True), eps, None) *
+            edge2 / np.clip(np.linalg.norm(edge2, axis=-1, keepdims=True), eps, None),
+            axis=-1
+        ))
+    return face_angle
+@batched(2, None, 2)
+def compute_vertex_normal(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    face_normal: np.ndarray = None
+) -> np.ndarray:
+    """
+    Compute vertex normals of a triangular mesh by averaging neightboring face normals
+    TODO: can be improved.
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        face_normal (np.ndarray, optional): [..., T, 3] face normals.
+            None to compute face normals from vertices and faces. Defaults to None.
+    Returns:
+        normals (np.ndarray): [..., N, 3] vertex normals
+    """
+    if face_normal is None:
+        face_normal = compute_face_normal(vertices, faces)
+    vertex_normal = np.zeros_like(vertices, dtype=vertices.dtype)
+    for n in range(vertices.shape[0]):
+        for i in range(3):
+            vertex_normal[n, :, 0] += np.bincount(faces[:, i], weights=face_normal[n, :, 0], minlength=vertices.shape[1])
+            vertex_normal[n, :, 1] += np.bincount(faces[:, i], weights=face_normal[n, :, 1], minlength=vertices.shape[1])
+            vertex_normal[n, :, 2] += np.bincount(faces[:, i], weights=face_normal[n, :, 2], minlength=vertices.shape[1])
+    vertex_normal_norm = np.linalg.norm(vertex_normal, axis=-1, keepdims=True)
+    vertex_normal_norm[vertex_normal_norm == 0] = 1
+    vertex_normal /= vertex_normal_norm
+    return vertex_normal
+@batched(2, None, 2)
+def compute_vertex_normal_weighted(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    face_normal: np.ndarray = None
+) -> np.ndarray:
+    """
+    Compute vertex normals of a triangular mesh by weighted sum of neightboring face normals
+    according to the angles
+    Args:
+        vertices (np.ndarray): [..., N, 3] 3-dimensional vertices
+        faces (np.ndarray): [..., T, 3] triangular face indices
+        face_normal (np.ndarray, optional): [..., T, 3] face normals.
+            None to compute face normals from vertices and faces. Defaults to None.
+    Returns:
+        normals (np.ndarray): [..., N, 3] vertex normals
+    """
+    if face_normal is None:
+        face_normal = compute_face_normal(vertices, faces)
+    face_angle = compute_face_angle(vertices, faces)
+    vertex_normal = np.zeros_like(vertices)
+    for n in range(vertices.shape[0]):
+        for i in range(3):
+            vertex_normal[n, :, 0] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 0] * face_angle[n, :, i], minlength=vertices.shape[1])
+            vertex_normal[n, :, 1] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 1] * face_angle[n, :, i], minlength=vertices.shape[1])
+            vertex_normal[n, :, 2] += np.bincount(faces[n, :, i], weights=face_normal[n, :, 2] * face_angle[n, :, i], minlength=vertices.shape[1])
+    vertex_normal_norm = np.linalg.norm(vertex_normal, axis=-1, keepdims=True)
+    vertex_normal_norm[vertex_normal_norm == 0] = 1
+    vertex_normal /= vertex_normal_norm
+    return vertex_normal
+def remove_corrupted_faces(
+        faces: np.ndarray
+    ) -> np.ndarray:
+    """
+    Remove corrupted faces (faces with duplicated vertices)
+    Args:
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        np.ndarray: [T_, 3] triangular face indices
+    """
+    corrupted = (faces[:, 0] == faces[:, 1]) | (faces[:, 1] == faces[:, 2]) | (faces[:, 2] == faces[:, 0])
+    return faces[~corrupted]
+def merge_duplicate_vertices(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+        tol: float = 1e-6
+    ) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Merge duplicate vertices of a triangular mesh.
+    Duplicate vertices are merged by selecte one of them, and the face indices are updated accordingly.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        tol (float, optional): tolerance for merging. Defaults to 1e-6.
+    Returns:
+        vertices (np.ndarray): [N_, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    """
+    vertices_round = np.round(vertices / tol)
+    _, uni_i, uni_inv = np.unique(vertices_round, return_index=True, return_inverse=True, axis=0)
+    vertices = vertices[uni_i]
+    faces = uni_inv[faces]
+    return vertices, faces
+def remove_unreferenced_vertices(
+    faces: np.ndarray,
+    *vertice_attrs,
+    return_indices: bool = False
+) -> Tuple[np.ndarray, ...]:
+    """
+    Remove unreferenced vertices of a mesh.
+    Unreferenced vertices are removed, and the face indices are updated accordingly.
+    Args:
+        faces (np.ndarray): [T, P] face indices
+        *vertice_attrs: vertex attributes
+    Returns:
+        faces (np.ndarray): [T, P] face indices
+        *vertice_attrs: vertex attributes
+        indices (np.ndarray, optional): [N] indices of vertices that are kept. Defaults to None.
+    """
+    P = faces.shape[-1]
+    fewer_indices, inv_map = np.unique(faces, return_inverse=True)
+    faces = inv_map.astype(np.int32).reshape(-1, P)
+    ret = [faces]
+    for attr in vertice_attrs:
+        ret.append(attr[fewer_indices])
+    if return_indices:
+        ret.append(fewer_indices)
+    return tuple(ret)
+def subdivide_mesh_simple(
+    vertices: np.ndarray,
+    faces: np.ndarray,
+    n: int = 1
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Subdivide a triangular mesh by splitting each triangle into 4 smaller triangles.
+    NOTE: All original vertices are kept, and new vertices are appended to the end of the vertex list.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+        n (int, optional): number of subdivisions. Defaults to 1.
+    Returns:
+        vertices (np.ndarray): [N_, 3] subdivided 3-dimensional vertices
+        faces (np.ndarray): [4 * T, 3] subdivided triangular face indices
+    """
+    for _ in range(n):
+        edges = np.stack([faces[:, [0, 1]], faces[:, [1, 2]], faces[:, [2, 0]]], axis=0)
+        edges = np.sort(edges, axis=2)
+        uni_edges, uni_inv = np.unique(edges.reshape(-1, 2), return_inverse=True, axis=0)
+        uni_inv = uni_inv.reshape(3, -1)
+        midpoints = (vertices[uni_edges[:, 0]] + vertices[uni_edges[:, 1]]) / 2
+        n_vertices = vertices.shape[0]
+        vertices = np.concatenate([vertices, midpoints], axis=0)
+        faces = np.concatenate([
+            np.stack([faces[:, 0], n_vertices + uni_inv[0], n_vertices + uni_inv[2]], axis=1),
+            np.stack([faces[:, 1], n_vertices + uni_inv[1], n_vertices + uni_inv[0]], axis=1),
+            np.stack([faces[:, 2], n_vertices + uni_inv[2], n_vertices + uni_inv[1]], axis=1),
+            np.stack([n_vertices + uni_inv[0], n_vertices + uni_inv[1], n_vertices + uni_inv[2]], axis=1),
+        ], axis=0)
+    return vertices, faces
+def mesh_relations(
+    faces: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Calculate the relation between vertices and faces.
+    NOTE: The input mesh must be a manifold triangle mesh.
+    Args:
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        edges (np.ndarray): [E, 2] edge indices
+        edge2face (np.ndarray): [E, 2] edge to face relation. The second column is -1 if the edge is boundary.
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        face2face (np.ndarray): [T, 3] face to face relation
+    """
+    T = faces.shape[0]
+    edges = np.stack([faces[:, [0, 1]], faces[:, [1, 2]], faces[:, [2, 0]]], axis=1).reshape(-1, 2)  # [3T, 2]
+    edges = np.sort(edges, axis=1)  # [3T, 2]
+    edges, face2edge, occurence = np.unique(edges, axis=0, return_inverse=True, return_counts=True) # [E, 2], [3T], [E]
+    E = edges.shape[0]
+    assert np.all(occurence <= 2), "The input mesh is not a manifold mesh."
+    # Edge to face relation
+    padding = np.arange(E, dtype=np.int32)[occurence == 1]
+    padded_face2edge = np.concatenate([face2edge, padding], axis=0)  # [2E]
+    edge2face = np.argsort(padded_face2edge, kind='stable').reshape(-1, 2) // 3  # [E, 2]
+    edge2face_valid = edge2face[:, 1] < T   # [E]
+    edge2face[~edge2face_valid, 1] = -1
+    # Face to edge relation
+    face2edge = face2edge.reshape(-1, 3)  # [T, 3]
+    # Face to face relation
+    face2face = edge2face[face2edge]  # [T, 3, 2]
+    face2face = face2face[face2face != np.arange(T)[:, None, None]].reshape(T, 3)  # [T, 3]
+    return edges, edge2face, face2edge, face2face
+@overload
+def flatten_mesh_indices(faces1: np.ndarray, attr1: np.ndarray, *other_faces_attrs_pairs: np.ndarray) -> Tuple[np.ndarray, ...]:
+    """
+    Rearrange the indices of a mesh to a flattened version. Vertices will be no longer shared.
+    ### Parameters:
+    - `faces1`: [T, P] face indices of the first attribute
+    - `attr1`: [N1, ...] attributes of the first mesh
+    - ...
+    ### Returns:
+    - `faces`: [T, P] flattened face indices, contigous from 0 to T * P - 1
+    - `attr1`: [T * P, ...] attributes of the first mesh, where every P values correspond to a face
+    _ ...
+    """
+def flatten_mesh_indices(*args: np.ndarray) -> Tuple[np.ndarray, ...]:
+    assert len(args) % 2 == 0, "The number of arguments must be even."
+    T, P = args[0].shape
+    assert all(arg.shape[0] == T and arg.shape[1] == P for arg in args[::2]), "The faces must have the same shape."
+    attr_flat = []
+    for faces_, attr_ in zip(args[::2], args[1::2]):
+        attr_flat_ = attr_[faces_].reshape(-1, *attr_.shape[1:])
+        attr_flat.append(attr_flat_)
+    faces_flat = np.arange(T * P, dtype=np.int32).reshape(T, P)
+    return faces_flat, *attr_flat

src/utils3d/numpy/quadmesh.py ADDED Viewed

	@@ -0,0 +1,472 @@

+import numpy as np
+import scipy as sp
+import scipy.optimize as spopt
+from typing import *
+__all__ = [
+    'calc_quad_candidates',
+    'calc_quad_distortion',
+    'calc_quad_direction',
+    'calc_quad_smoothness',
+    'sovle_quad',
+    'sovle_quad_qp',
+    'tri_to_quad'
+]
+def calc_quad_candidates(
+    edges: np.ndarray,
+    face2edge: np.ndarray,
+    edge2face: np.ndarray,
+):
+    """
+    Calculate the candidate quad faces.
+    Args:
+        edges (np.ndarray): [E, 2] edge indices
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+    Returns:
+        quads (np.ndarray): [Q, 4] quad candidate indices
+        quad2edge (np.ndarray): [Q, 4] edge to quad candidate relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad candidates of each quad candidate
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    """
+    E = edges.shape[0]
+    T = face2edge.shape[0]
+    quads_valid = edge2face[:, 1] != -1
+    Q = quads_valid.sum()
+    quad2face = edge2face[quads_valid]  # [Q, 2]
+    quad2edge = face2edge[quad2face]  # [Q, 2, 3]
+    flag = quad2edge == np.arange(E)[quads_valid][:, None, None] # [Q, 2, 3]
+    flag = flag.argmax(axis=-1)  # [Q, 2]
+    quad2edge = np.stack([
+        quad2edge[np.arange(Q)[:, None], np.arange(2)[None, :], (flag + 1) % 3],
+        quad2edge[np.arange(Q)[:, None], np.arange(2)[None, :], (flag + 2) % 3],
+    ], axis=-1).reshape(Q, 4)  # [Q, 4]
+    quads = np.concatenate([
+        np.where(
+            (edges[quad2edge[:, 0:1], 1:] == edges[quad2edge[:, 1:2], :]).any(axis=-1),
+            edges[quad2edge[:, 0:1], [[0, 1]]],
+            edges[quad2edge[:, 0:1], [[1, 0]]],
+        ),
+        np.where(
+            (edges[quad2edge[:, 2:3], 1:] == edges[quad2edge[:, 3:4], :]).any(axis=-1),
+            edges[quad2edge[:, 2:3], [[0, 1]]],
+            edges[quad2edge[:, 2:3], [[1, 0]]],
+        ),
+    ], axis=1)  # [Q, 4]
+    quad2adj = edge2face[quad2edge]  # [Q, 4, 2]
+    quad2adj = quad2adj[quad2adj != quad2face[:, [0,0,1,1], None]].reshape(Q, 4)  # [Q, 4]
+    quad2adj_valid = quad2adj != -1
+    quad2adj = face2edge[quad2adj]  # [Q, 4, 3]
+    quad2adj[~quad2adj_valid, 0] = quad2edge[~quad2adj_valid]
+    quad2adj[~quad2adj_valid, 1:] = -1
+    quad2adj = quad2adj[quad2adj != quad2edge[..., None]].reshape(Q, 8)  # [Q, 8]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    quad2adj_valid = quad2adj != -1
+    quad2adj[quad2adj_valid] = edge_valid[quad2adj[quad2adj_valid]]  # [Q, 8]
+    return quads, quad2edge, quad2adj, quads_valid
+def calc_quad_distortion(
+    vertices: np.ndarray,
+    quads: np.ndarray,
+):
+    """
+    Calculate the distortion of each candidate quad face.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        quads (np.ndarray): [Q, 4] quad face indices
+    Returns:
+        distortion (np.ndarray): [Q] distortion of each quad face
+    """
+    edge0 = vertices[quads[:, 1]] - vertices[quads[:, 0]]  # [Q, 3]
+    edge1 = vertices[quads[:, 2]] - vertices[quads[:, 1]]  # [Q, 3]
+    edge2 = vertices[quads[:, 3]] - vertices[quads[:, 2]]  # [Q, 3]
+    edge3 = vertices[quads[:, 0]] - vertices[quads[:, 3]]  # [Q, 3]
+    cross = vertices[quads[:, 0]] - vertices[quads[:, 2]]  # [Q, 3]
+    len0 = np.maximum(np.linalg.norm(edge0, axis=-1), 1e-10)  # [Q]
+    len1 = np.maximum(np.linalg.norm(edge1, axis=-1), 1e-10)  # [Q]
+    len2 = np.maximum(np.linalg.norm(edge2, axis=-1), 1e-10)  # [Q]
+    len3 = np.maximum(np.linalg.norm(edge3, axis=-1), 1e-10)  # [Q]
+    len_cross = np.maximum(np.linalg.norm(cross, axis=-1), 1e-10)  # [Q]
+    angle0 = np.arccos(np.clip(np.sum(-edge0 * edge1, axis=-1) / (len0 * len1), -1, 1))  # [Q]
+    angle1 = np.arccos(np.clip(np.sum(-edge1 * cross, axis=-1) / (len1 * len_cross), -1, 1)) \
+           + np.arccos(np.clip(np.sum(cross * edge2, axis=-1) / (len_cross * len2), -1, 1))  # [Q]
+    angle2 = np.arccos(np.clip(np.sum(-edge2 * edge3, axis=-1) / (len2 * len3), -1, 1))  # [Q]
+    angle3 = np.arccos(np.clip(np.sum(-edge3 * -cross, axis=-1) / (len3 * len_cross), -1, 1)) \
+           + np.arccos(np.clip(np.sum(-cross * edge0, axis=-1) / (len_cross * len0), -1, 1))  # [Q]
+    normal0 = np.cross(edge0, edge1)  # [Q, 3]
+    normal1 = np.cross(edge2, edge3)  # [Q, 3]
+    normal0 = normal0 / np.maximum(np.linalg.norm(normal0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    normal1 = normal1 / np.maximum(np.linalg.norm(normal1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    angle_normal = np.arccos(np.clip(np.sum(normal0 * normal1, axis=-1), -1, 1))  # [Q]
+    D90 = np.pi / 2
+    D180 = np.pi
+    D360 = np.pi * 2
+    ang_eng = (np.abs(angle0 - D90)**2 + np.abs(angle1 - D90)**2 + np.abs(angle2 - D90)**2 + np.abs(angle3 - D90)**2) / 4  # [Q]
+    dist_eng = np.abs(angle0 - angle2)**2 / np.minimum(np.maximum(np.minimum(angle0, angle2), 1e-10), np.maximum(D180 - np.maximum(angle0, angle2), 1e-10)) \
+             + np.abs(angle1 - angle3)**2 / np.minimum(np.maximum(np.minimum(angle1, angle3), 1e-10), np.maximum(D180 - np.maximum(angle1, angle3), 1e-10))  # [Q]
+    plane_eng = np.where(angle_normal < D90/2, np.abs(angle_normal)**2, 1e10)  # [Q]
+    eng = ang_eng + 2 * dist_eng + 2 * plane_eng  # [Q]
+    return eng
+def calc_quad_direction(
+        vertices: np.ndarray,
+        quads: np.ndarray,
+    ):
+    """
+    Calculate the direction of each candidate quad face.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        quads (np.ndarray): [Q, 4] quad face indices
+    Returns:
+        direction (np.ndarray): [Q, 4] direction of each quad face.
+            Represented by the angle between the crossing and each edge.
+    """
+    mid0 = (vertices[quads[:, 0]] + vertices[quads[:, 1]]) / 2  # [Q, 3]
+    mid1 = (vertices[quads[:, 1]] + vertices[quads[:, 2]]) / 2  # [Q, 3]
+    mid2 = (vertices[quads[:, 2]] + vertices[quads[:, 3]]) / 2  # [Q, 3]
+    mid3 = (vertices[quads[:, 3]] + vertices[quads[:, 0]]) / 2  # [Q, 3]
+    cross0 = mid2 - mid0  # [Q, 3]
+    cross1 = mid3 - mid1  # [Q, 3]
+    cross0 = cross0 / np.maximum(np.linalg.norm(cross0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    cross1 = cross1 / np.maximum(np.linalg.norm(cross1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge0 = vertices[quads[:, 1]] - vertices[quads[:, 0]]  # [Q, 3]
+    edge1 = vertices[quads[:, 2]] - vertices[quads[:, 1]]  # [Q, 3]
+    edge2 = vertices[quads[:, 3]] - vertices[quads[:, 2]]  # [Q, 3]
+    edge3 = vertices[quads[:, 0]] - vertices[quads[:, 3]]  # [Q, 3]
+    edge0 = edge0 / np.maximum(np.linalg.norm(edge0, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge1 = edge1 / np.maximum(np.linalg.norm(edge1, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge2 = edge2 / np.maximum(np.linalg.norm(edge2, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    edge3 = edge3 / np.maximum(np.linalg.norm(edge3, axis=-1, keepdims=True), 1e-10)  # [Q, 3]
+    direction = np.stack([
+        np.arccos(np.clip(np.sum(cross0 * edge0, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(cross1 * edge1, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(-cross0 * edge2, axis=-1), -1, 1)),
+        np.arccos(np.clip(np.sum(-cross1 * edge3, axis=-1), -1, 1)),
+    ], axis=-1)  # [Q, 4]
+    return direction
+def calc_quad_smoothness(
+        quad2edge: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_direction: np.ndarray,
+    ):
+    """
+    Calculate the smoothness of each candidate quad face connection.
+    Args:
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_direction (np.ndarray): [Q, 4] direction of each quad face
+    Returns:
+        smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+    """
+    Q = quad2adj.shape[0]
+    quad2adj_valid = quad2adj != -1
+    connections = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj_valid]  # [C, 2]
+    shared_edge_idx_0 = np.array([[0, 0, 1, 1, 2, 2, 3, 3]]).repeat(Q, axis=0)[quad2adj_valid]  # [C]
+    shared_edge_idx_1 = np.argmax(quad2edge[quad2adj][quad2adj_valid] == quad2edge[connections[:, 0], shared_edge_idx_0][:, None], axis=-1)  # [C]
+    valid_smoothness = np.abs(quads_direction[connections[:, 0], shared_edge_idx_0] - quads_direction[connections[:, 1], shared_edge_idx_1])**2  # [C]
+    smoothness = np.zeros([Q, 8], dtype=np.float32)
+    smoothness[quad2adj_valid] = valid_smoothness
+    return smoothness
+def sovle_quad(
+        face2edge: np.ndarray,
+        edge2face: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_distortion: np.ndarray,
+        quads_smoothness: np.ndarray,
+        quads_valid: np.ndarray,
+    ):
+    """
+    Solve the quad mesh from the candidate quad faces.
+    Args:
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_distortion (np.ndarray): [Q] distortion of each quad face
+        quads_smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    Returns:
+        weights (np.ndarray): [Q] weight of each valid quad face
+    """
+    T = face2edge.shape[0]
+    E = edge2face.shape[0]
+    Q = quads_distortion.shape[0]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    quads_connection = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj != -1]  # [C, 2]
+    quads_connection = np.sort(quads_connection, axis=-1)  # [C, 2]
+    quads_connection, quads_connection_idx = np.unique(quads_connection, axis=0, return_index=True)  # [C, 2], [C]
+    quads_smoothness = quads_smoothness[quad2adj != -1]  # [C]
+    quads_smoothness = quads_smoothness[quads_connection_idx]  # [C]
+    C = quads_connection.shape[0]
+    # Construct the linear programming problem
+    # Variables:
+    #   quads_weight: [Q] weight of each quad face
+    #   tri_min_weight: [T] minimum weight of each triangle face
+    #   conn_min_weight: [C] minimum weight of each quad face connection
+    #   conn_max_weight: [C] maximum weight of each quad face connection
+    # Objective:
+    #   mimi
+    c = np.concatenate([
+        quads_distortion - 3,
+        quads_smoothness*4 - 2,
+        quads_smoothness*4,
+    ], axis=0)  # [Q+C]
+    A_ub_triplet = np.concatenate([
+        np.stack([np.arange(T), edge_valid[face2edge[:, 0]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 1]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 2]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T, T+C), np.arange(Q, Q+C), np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T, T+C), quads_connection[:, 0], -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T, T+C), quads_connection[:, 1], -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), np.arange(Q+C, Q+2*C), -np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), quads_connection[:, 0], np.ones(C)], axis=1),  # [C, 3]
+        np.stack([np.arange(T+C, T+2*C), quads_connection[:, 1], np.ones(C)], axis=1),  # [C, 3]
+    ], axis=0)  # [3T+6C, 3]
+    A_ub_triplet = A_ub_triplet[A_ub_triplet[:, 1] != -1]  # [3T', 3]
+    A_ub = sp.sparse.coo_matrix((A_ub_triplet[:, 2], (A_ub_triplet[:, 0], A_ub_triplet[:, 1])), shape=[T+2*C, Q+2*C])  # [T,
+    b_ub = np.concatenate([np.ones(T), -np.ones(C), np.ones(C)], axis=0)  # [T+2C]
+    bound = np.stack([
+        np.concatenate([np.zeros(Q), -np.ones(C), np.zeros(C)], axis=0),
+        np.concatenate([np.ones(Q), np.ones(C), np.ones(C)], axis=0),
+    ], axis=1)  # [Q+2C, 2]
+    A_eq = None
+    b_eq = None
+    print('Solver statistics:')
+    print(f'    #T = {T}')
+    print(f'    #Q = {Q}')
+    print(f'    #C = {C}')
+    # Solve the linear programming problem
+    last_num_valid = 0
+    for i in range(100):
+        res_ = spopt.linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bound)
+        if not res_.success:
+            print(f'    Iter {i} | Failed with {res_.message}')
+            break
+        res = res_
+        weights = res.x[:Q]
+        valid = (weights > 0.5)
+        num_valid = valid.sum()
+        print(f'    Iter {i} | #Q_valid = {num_valid}')
+        if num_valid == last_num_valid:
+            break
+        last_num_valid = num_valid
+        A_eq_triplet = np.stack([
+            np.arange(num_valid),
+            np.arange(Q)[valid],
+            np.ones(num_valid),
+        ], axis=1)  # [num_valid, 3]
+        A_eq = sp.sparse.coo_matrix((A_eq_triplet[:, 2], (A_eq_triplet[:, 0], A_eq_triplet[:, 1])), shape=[num_valid, Q+2*C])  # [num_valid, Q+C]
+        b_eq = np.where(weights[valid] > 0.5, 1, 0)  # [num_valid]
+    # Return the result
+    quads_weight = res.x[:Q]
+    conn_min_weight = res.x[Q:Q+C]
+    conn_max_weight = res.x[Q+C:Q+2*C]
+    return quads_weight, conn_min_weight, conn_max_weight
+def sovle_quad_qp(
+        face2edge: np.ndarray,
+        edge2face: np.ndarray,
+        quad2adj: np.ndarray,
+        quads_distortion: np.ndarray,
+        quads_smoothness: np.ndarray,
+        quads_valid: np.ndarray,
+    ):
+    """
+    Solve the quad mesh from the candidate quad faces.
+    Args:
+        face2edge (np.ndarray): [T, 3] face to edge relation
+        edge2face (np.ndarray): [E, 2] edge to face relation
+        quad2adj (np.ndarray): [Q, 8] adjacent quad faces of each quad face
+        quads_distortion (np.ndarray): [Q] distortion of each quad face
+        quads_smoothness (np.ndarray): [Q, 8] smoothness of each quad face connection
+        quads_valid (np.ndarray): [E] whether the quad corresponding to the edge is valid
+    Returns:
+        weights (np.ndarray): [Q] weight of each valid quad face
+    """
+    T = face2edge.shape[0]
+    E = edge2face.shape[0]
+    Q = quads_distortion.shape[0]
+    edge_valid = -np.ones(E, dtype=np.int32)
+    edge_valid[quads_valid] = np.arange(Q)
+    # Construct the quadratic programming problem
+    C_smoothness_triplet = np.stack([
+        np.arange(Q)[:, None].repeat(8, axis=1)[quad2adj != -1],
+        quad2adj[quad2adj != -1],
+        5 * quads_smoothness[quad2adj != -1],
+    ], axis=-1)  # [C, 3]
+    # C_smoothness_triplet = np.concatenate([
+    #     C_smoothness_triplet,
+    #     np.stack([np.arange(Q), np.arange(Q), 20*np.ones(Q)], axis=1),
+    # ], axis=0)  # [C+Q, 3]
+    C_smoothness = sp.sparse.coo_matrix((C_smoothness_triplet[:, 2], (C_smoothness_triplet[:, 0], C_smoothness_triplet[:, 1])), shape=[Q, Q])  # [Q, Q]
+    C_smoothness = C_smoothness.tocsc()
+    C_dist = quads_distortion - 20  # [Q]
+    A_eq = sp.sparse.coo_matrix((np.zeros(Q), (np.zeros(Q), np.arange(Q))), shape=[1, Q])  # [1, Q]\
+    A_eq = A_eq.tocsc()
+    b_eq = np.array([0])
+    A_ub_triplet = np.concatenate([
+        np.stack([np.arange(T), edge_valid[face2edge[:, 0]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 1]], np.ones(T)], axis=1),  # [T, 3]
+        np.stack([np.arange(T), edge_valid[face2edge[:, 2]], np.ones(T)], axis=1),  # [T, 3]
+    ], axis=0)  # [3T, 3]
+    A_ub_triplet = A_ub_triplet[A_ub_triplet[:, 1] != -1]  # [3T', 3]
+    A_ub = sp.sparse.coo_matrix((A_ub_triplet[:, 2], (A_ub_triplet[:, 0], A_ub_triplet[:, 1])), shape=[T, Q])  # [T, Q]
+    A_ub = A_ub.tocsc()
+    b_ub = np.ones(T)
+    lb = np.zeros(Q)
+    ub = np.ones(Q)
+    import piqp
+    solver = piqp.SparseSolver()
+    solver.settings.verbose = True
+    solver.settings.compute_timings = True
+    solver.setup(C_smoothness, C_dist, A_eq, b_eq, A_ub, b_ub, lb, ub)
+    status = solver.solve()
+    # x = cp.Variable(Q)
+    # prob = cp.Problem(
+    #     cp.Minimize(cp.quad_form(x, C_smoothness) + C_dist.T @ x),
+    #     [
+    #         A_ub @ x <= b_ub,
+    #         x >= 0, x <= 1,
+    #     ]
+    # )
+    # # Solve the quadratic programming problem
+    # prob.solve(solver=cp.PIQP, verbose=True)
+    # Return the result
+    weights = solver.result.x
+    return weights
+def tri_to_quad(
+        vertices: np.ndarray,
+        faces: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Convert a triangle mesh to a quad mesh.
+    NOTE: The input mesh must be a manifold mesh.
+    Args:
+        vertices (np.ndarray): [N, 3] 3-dimensional vertices
+        faces (np.ndarray): [T, 3] triangular face indices
+    Returns:
+        vertices (np.ndarray): [N_, 3] 3-dimensional vertices
+        faces (np.ndarray): [Q, 4] quad face indices
+    """
+    raise NotImplementedError
+if __name__ == '__main__':
+    import os
+    import sys
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
+    import utils3d
+    import numpy as np
+    import cv2
+    from vis import vis_edge_color
+    file = 'miku'
+    vertices, faces = utils3d.io.read_ply(f'test/assets/{file}.ply')
+    edges, edge2face, face2edge, face2face = calc_relations(faces)
+    quad_cands, quad2edge, quad2adj, quad_valid = calc_quad_candidates(edges, face2edge, edge2face)
+    distortion = calc_quad_distortion(vertices, quad_cands)
+    direction = calc_quad_direction(vertices, quad_cands)
+    smoothness = calc_quad_smoothness(quad2edge, quad2adj, direction)
+    boundary_edges = edges[edge2face[:, 1] == -1]
+    quads_weight, conn_min_weight, conn_max_weight = sovle_quad(face2edge, edge2face, quad2adj, distortion, smoothness, quad_valid)
+    quads = quad_cands[quads_weight > 0.5]
+    print('Mesh statistics')
+    print(f'    #V      =   {vertices.shape[0]}')
+    print(f'    #F      =   {faces.shape[0]}')
+    print(f'    #E      =   {edges.shape[0]}')
+    print(f'    #B      =   {boundary_edges.shape[0]}')
+    print(f'    #Q_cand =   {quad_cands.shape[0]}')
+    print(f'    #Q      =   {quads.shape[0]}')
+    utils3d.io.write_ply(f'test/assets/{file}_boundary_edges.ply', vertices=vertices, edges=boundary_edges)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates.ply', vertices=vertices, faces=quads)
+    edge_colors = np.zeros([edges.shape[0], 3], dtype=np.uint8)
+    distortion = (distortion - distortion.min()) / (distortion.max() - distortion.min())
+    distortion = (distortion * 255).astype(np.uint8)
+    edge_colors[quad_valid] = cv2.cvtColor(cv2.applyColorMap(distortion, cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates_distortion.ply', **vis_edge_color(vertices, edges, edge_colors))
+    edge_colors = np.zeros([edges.shape[0], 3], dtype=np.uint8)
+    edge_colors[quad_valid] = cv2.cvtColor(cv2.applyColorMap((quads_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_candidates_weights.ply', **vis_edge_color(vertices, edges, edge_colors))
+    utils3d.io.write_ply(f'test/assets/{file}_quad.ply', vertices=vertices, faces=quads)
+    quad_centers = vertices[quad_cands].mean(axis=1)
+    conns = np.stack([
+        np.arange(quad_cands.shape[0])[:, None].repeat(8, axis=1),
+        quad2adj,
+    ], axis=-1)[quad2adj != -1]  # [C, 2]
+    conns, conns_idx = np.unique(np.sort(conns, axis=-1), axis=0, return_index=True)  # [C, 2], [C]
+    smoothness = smoothness[quad2adj != -1][conns_idx]  # [C]
+    conns_color = cv2.cvtColor(cv2.applyColorMap((smoothness * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_smoothness.ply', **vis_edge_color(quad_centers, conns, conns_color))
+    conns_color = cv2.cvtColor(cv2.applyColorMap((conn_min_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_min.ply', **vis_edge_color(quad_centers, conns, conns_color))
+    conns_color = cv2.cvtColor(cv2.applyColorMap((conn_max_weight * 255).astype(np.uint8), cv2.COLORMAP_JET), cv2.COLOR_BGR2RGB).reshape(-1, 3)
+    utils3d.io.write_ply(f'test/assets/{file}_quad_conn_max.ply', **vis_edge_color(quad_centers, conns, conns_color))