tuandunghcmut's picture
Add files using upload-large-folder tool
391089d verified
raw
history blame
2.74 kB
import torch
import numpy as np
import torch.nn.functional as F
from pycocotools import mask as mask_utils
def grounding_image_ecoder_preprocess(x, pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1),
img_size=1024) -> torch.Tensor:
"""Normalize pixel values and pad to a square input."""
# Normalize colors
x = (x - pixel_mean) / pixel_std
# Pad
h, w = x.shape[-2:]
padh = img_size - h
padw = img_size - w
x = F.pad(x, (0, padw, 0, padh))
return x
def mask_to_rle_pytorch(tensor: torch.Tensor):
"""
Encodes masks to an uncompressed RLE, in the format expected by
pycoco tools.
"""
# Put in fortran order and flatten h,w
b, h, w = tensor.shape
tensor = tensor.permute(0, 2, 1).flatten(1)
# Compute change indices
diff = tensor[:, 1:] ^ tensor[:, :-1]
change_indices = diff.nonzero()
# Encode run length
out = []
for i in range(b):
cur_idxs = change_indices[change_indices[:, 0] == i, 1]
cur_idxs = torch.cat(
[torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device), cur_idxs + 1,
torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device), ]
)
btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
counts = [] if tensor[i, 0] == 0 else [0]
counts.extend(btw_idxs.detach().cpu().tolist())
out.append({"size": [h, w], "counts": counts})
return out
def mask_to_rle_numpy(mask: np.ndarray):
"""
Encodes masks to an uncompressed RLE, in the format expected by
pycoco tools.
"""
h, w = mask.shape
# Put in fortran order and flatten h,w
mask = np.transpose(mask).flatten()
# Compute change indices
diff = mask[1:] ^ mask[:-1]
change_indices = np.where(diff)[0]
# Encode run length
cur_idxs = np.concatenate(
([0], change_indices + 1, [h * w])
)
btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
counts = [] if mask[0] == 0 else [0]
counts.extend(btw_idxs.tolist())
return {"size": [h, w], "counts": counts}
def coco_encode_rle(uncompressed_rle):
h, w = uncompressed_rle["size"]
rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
rle["counts"] = rle["counts"].decode("utf-8") # Necessary to serialize with json
return rle
def compute_iou(mask1, mask2):
intersection = np.logical_and(mask1, mask2)
union = np.logical_or(mask1, mask2)
iou = np.sum(intersection) / np.sum(union)
return iou
def bbox_to_x1y1x2y2(bbox):
x1, y1, w, h = bbox
bbox = [x1, y1, x1 + w, y1 + h]
return bbox