Spaces:

stabilityai
/

stable-point-aware-3d

Running on L4

File size: 4,232 Bytes

38dbec8
 
 
 
 
 
 
 
 
 
 
 
4d8c3d6
38dbec8

import os

import numpy as np
import torch
import torchvision.transforms.functional as torchvision_F
from PIL import Image
from transparent_background import Remover

import spar3d.models.utils as spar3d_utils


def get_device():
    if os.environ.get("SPAR3D_USE_CPU", "0") == "1":
        return "cpu"

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    return device


def create_intrinsic_from_fov_rad(fov_rad: float, cond_height: int, cond_width: int):
    intrinsic = spar3d_utils.get_intrinsic_from_fov(
        fov_rad,
        H=cond_height,
        W=cond_width,
    )
    intrinsic_normed_cond = intrinsic.clone()
    intrinsic_normed_cond[..., 0, 2] /= cond_width
    intrinsic_normed_cond[..., 1, 2] /= cond_height
    intrinsic_normed_cond[..., 0, 0] /= cond_width
    intrinsic_normed_cond[..., 1, 1] /= cond_height

    return intrinsic, intrinsic_normed_cond


def create_intrinsic_from_fov_deg(fov_deg: float, cond_height: int, cond_width: int):
    return create_intrinsic_from_fov_rad(np.deg2rad(fov_deg), cond_height, cond_width)


def default_cond_c2w(distance: float):
    c2w_cond = torch.as_tensor(
        [
            [0, 0, 1, distance],
            [1, 0, 0, 0],
            [0, 1, 0, 0],
            [0, 0, 0, 1],
        ]
    ).float()
    return c2w_cond


def normalize_pc_bbox(pc, scale=1.0):
    # get the bounding box of the mesh
    assert len(pc.shape) in [2, 3] and pc.shape[-1] in [3, 6, 9]
    n_dim = len(pc.shape)
    device = pc.device
    pc = pc.cpu()
    if n_dim == 2:
        pc = pc.unsqueeze(0)
    normalize_pc = []
    for b in range(pc.shape[0]):
        xyz = pc[b, :, :3]  # [N, 3]
        bound_x = (xyz[:, 0].max(), xyz[:, 0].min())
        bound_y = (xyz[:, 1].max(), xyz[:, 1].min())
        bound_z = (xyz[:, 2].max(), xyz[:, 2].min())
        # get the center of the bounding box
        center = np.array(
            [
                (bound_x[0] + bound_x[1]) / 2,
                (bound_y[0] + bound_y[1]) / 2,
                (bound_z[0] + bound_z[1]) / 2,
            ]
        )
        # get the largest dimension of the bounding box
        scale = max(
            bound_x[0] - bound_x[1], bound_y[0] - bound_y[1], bound_z[0] - bound_z[1]
        )
        xyz = (xyz - center) / scale
        extra = pc[b, :, 3:]
        normalize_pc.append(torch.cat([xyz, extra], dim=-1))
    return (
        torch.stack(normalize_pc, dim=0).to(device)
        if n_dim == 3
        else normalize_pc[0].to(device)
    )


def remove_background(
    image: Image,
    bg_remover: Remover = None,
    force: bool = False,
    **transparent_background_kwargs,
) -> Image:
    do_remove = True
    if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
        do_remove = False
    do_remove = do_remove or force
    if do_remove:
        image = bg_remover.process(
            image.convert("RGB"), **transparent_background_kwargs
        )
    return image


def get_1d_bounds(arr):
    nz = np.flatnonzero(arr)
    return nz[0], nz[-1]


def get_bbox_from_mask(mask, thr=0.5):
    masks_for_box = (mask > thr).astype(np.float32)
    assert masks_for_box.sum() > 0, "Empty mask!"
    x0, x1 = get_1d_bounds(masks_for_box.sum(axis=-2))
    y0, y1 = get_1d_bounds(masks_for_box.sum(axis=-1))
    return x0, y0, x1, y1


def foreground_crop(image_rgba, crop_ratio=1.3, newsize=None, no_crop=False):
    # make sure the image is a PIL image in RGBA mode
    assert image_rgba.mode == "RGBA", "Image must be in RGBA mode!"
    if not no_crop:
        mask_np = np.array(image_rgba)[:, :, -1]
        mask_np = (mask_np >= 1).astype(np.float32)
        x1, y1, x2, y2 = get_bbox_from_mask(mask_np, thr=0.5)
        h, w = y2 - y1, x2 - x1
        yc, xc = (y1 + y2) / 2, (x1 + x2) / 2
        scale = max(h, w) * crop_ratio
        image = torchvision_F.crop(
            image_rgba,
            top=int(yc - scale / 2),
            left=int(xc - scale / 2),
            height=int(scale),
            width=int(scale),
        )
    else:
        image = image_rgba
    # resize if needed
    if newsize is not None:
        image = image.resize(newsize)
    return image