|
|
import math |
|
|
from typing import List |
|
|
|
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray: |
|
|
""" |
|
|
Copied from yolov8. |
|
|
|
|
|
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the |
|
|
top-left corner and (x2, y2) is the bottom-right corner. |
|
|
|
|
|
Args: |
|
|
x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. |
|
|
Returns: |
|
|
y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. |
|
|
""" |
|
|
y = np.copy(x) |
|
|
y[..., 0] = x[..., 0] - x[..., 2] / 2 |
|
|
y[..., 1] = x[..., 1] - x[..., 3] / 2 |
|
|
y[..., 2] = x[..., 0] + x[..., 2] / 2 |
|
|
y[..., 3] = x[..., 1] + x[..., 3] / 2 |
|
|
return y |
|
|
|
|
|
|
|
|
def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]: |
|
|
""" |
|
|
dets: ndarray, (num_boxes, 5) |
|
|
每一行表示一个bounding box:[xmin, ymin, xmax, ymax, score] |
|
|
其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标,score表示框的分数 |
|
|
thresh: float |
|
|
两个框的IoU阈值 |
|
|
""" |
|
|
x1 = boxes[:, 0] |
|
|
y1 = boxes[:, 1] |
|
|
x2 = boxes[:, 2] |
|
|
y2 = boxes[:, 3] |
|
|
areas = (x2 - x1 + 1) * (y2 - y1 + 1) |
|
|
|
|
|
|
|
|
order = scores.argsort()[::-1] |
|
|
|
|
|
keep = [] |
|
|
while order.size > 0: |
|
|
i = order[0] |
|
|
keep.append(i) |
|
|
|
|
|
xx1 = np.maximum(x1[i], x1[order[1:]]) |
|
|
yy1 = np.maximum(y1[i], y1[order[1:]]) |
|
|
xx2 = np.minimum(x2[i], x2[order[1:]]) |
|
|
yy2 = np.minimum(y2[i], y2[order[1:]]) |
|
|
|
|
|
w = np.maximum(0.0, xx2 - xx1 + 1) |
|
|
h = np.maximum(0.0, yy2 - yy1 + 1) |
|
|
|
|
|
inter = w * h |
|
|
iou = inter / (areas[i] + areas[order[1:]] - inter) |
|
|
|
|
|
|
|
|
inds = np.where(iou <= thresh)[0] |
|
|
order = order[inds + 1] |
|
|
|
|
|
return keep |
|
|
|
|
|
|
|
|
def _image_preprocess(image: Image.Image, max_infer_size: int = 1216, align: int = 32): |
|
|
old_width, old_height = image.width, image.height |
|
|
new_width, new_height = old_width, old_height |
|
|
r = max_infer_size / max(new_width, new_height) |
|
|
if r < 1: |
|
|
new_width, new_height = new_width * r, new_height * r |
|
|
new_width = int(math.ceil(new_width / align) * align) |
|
|
new_height = int(math.ceil(new_height / align) * align) |
|
|
image = image.resize((new_width, new_height)) |
|
|
return image, (old_width, old_height), (new_width, new_height) |
|
|
|
|
|
|
|
|
def _xy_postprocess(x, y, old_size, new_size): |
|
|
old_width, old_height = old_size |
|
|
new_width, new_height = new_size |
|
|
x, y = x / new_width * old_width, y / new_height * old_height |
|
|
x = int(np.clip(x, a_min=0, a_max=old_width).round()) |
|
|
y = int(np.clip(y, a_min=0, a_max=old_height).round()) |
|
|
return x, y |
|
|
|
|
|
|
|
|
def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size): |
|
|
output = output[:, output[-1, :] > conf_threshold] |
|
|
boxes = output[:4, :].transpose(1, 0) |
|
|
scores = output[4, :] |
|
|
records = sorted(zip(boxes, scores), key=lambda x: -x[1]) |
|
|
|
|
|
boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records])) |
|
|
scores = np.stack([score for _, score in records]) |
|
|
idx = _yolo_nms(boxes, scores, thresh=iou_threshold) |
|
|
boxes, scores = boxes[idx], scores[idx] |
|
|
|
|
|
detections = [] |
|
|
for box, score in zip(boxes, scores): |
|
|
x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size) |
|
|
x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size) |
|
|
detections.append(((x0, y0, x1, y1), float(score))) |
|
|
|
|
|
return detections |
|
|
|