Spaces:
Sleeping
Sleeping
| import evaluate | |
| import datasets | |
| import numpy as np | |
| from seametrics.payload import Payload | |
| import torch | |
| import datasets | |
| _CITATION = """\ | |
| @InProceedings{huggingface:module, | |
| title = {A great new module}, | |
| authors={huggingface, Inc.}, | |
| year={2020} | |
| }\ | |
| @article{milan2016mot16, | |
| title={MOT16: A benchmark for multi-object tracking}, | |
| author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad}, | |
| journal={arXiv preprint arXiv:1603.00831}, | |
| year={2016} | |
| } | |
| """ | |
| _DESCRIPTION = """\ | |
| The MOT Metrics module is designed to evaluate multi-object tracking (MOT) | |
| algorithms by computing various metrics based on predicted and ground truth bounding | |
| boxes. It serves as a crucial tool in assessing the performance of MOT systems, | |
| aiding in the iterative improvement of tracking algorithms.""" | |
| _KWARGS_DESCRIPTION = """ | |
| Calculates how good are predictions given some references, using certain scores | |
| Args: | |
| predictions: list of predictions to score. Each predictions | |
| should be a string with tokens separated by spaces. | |
| references: list of reference for each prediction. Each | |
| reference should be a string with tokens separated by spaces. | |
| max_iou (`float`, *optional*): | |
| If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive. | |
| Default is 0.5. | |
| """ | |
| class box_metrics(evaluate.Metric): | |
| def __init__(self, **kwargs): | |
| super().__init__(**kwargs) | |
| self.boxes = {} | |
| self.gt_field = "ground_truth_det" | |
| def _info(self): | |
| # TODO: Specifies the evaluate.EvaluationModuleInfo object | |
| return evaluate.MetricInfo( | |
| # This is the description that will appear on the modules page. | |
| module_type="metric", | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| # This defines the format of each prediction and reference | |
| features=datasets.Features({ | |
| "predictions": datasets.Sequence( | |
| datasets.Sequence(datasets.Value("float")) | |
| ), | |
| "references": datasets.Sequence( | |
| datasets.Sequence(datasets.Value("float")) | |
| ) | |
| }), | |
| # Additional links to the codebase or references | |
| codebase_urls=["http://github.com/path/to/codebase/of/new_module"], | |
| reference_urls=["http://path.to.reference.url/new_module"] | |
| ) | |
| def add_payload(self, payload: Payload): | |
| """Convert a payload to the format of the tracking metrics library""" | |
| self.add(payload) | |
| def add(self, payload: Payload): | |
| """Convert a payload to the format of the tracking metrics library""" | |
| self.gt_field = payload.gt_field_name | |
| for sequence in payload.sequences: | |
| self.boxes[sequence] = {} | |
| target = payload.sequences[sequence][self.gt_field] | |
| resolution = payload.sequences[sequence]["resolution"] | |
| target_tm = self.payload_labels_to_tm(target, resolution) | |
| self.boxes[sequence][self.gt_field] = target_tm | |
| for model in payload.models: | |
| preds = payload.sequences[sequence][model] | |
| preds_tm = self.payload_preds_to_rm(preds, resolution) | |
| self.boxes[sequence][model] = preds_tm | |
| def add_batch(self, predictions, references, sequence_name = "sequence"): | |
| """Add a batch of predictions and references to the metric | |
| Mainly for testing purposes | |
| references: list of tm boxes as [n, 5] tensors | |
| box format: label, x1, y1, x2, y2 | |
| predictions: dict of {model_name: list of tm boxes as [n, 6] tensors} | |
| box format: x1, y1, x2, y2, conf, label | |
| """ | |
| self.boxes[sequence_name] = {} | |
| self.boxes[sequence_name][self.gt_field] = [] | |
| self.boxes[sequence_name][self.gt_field] = references | |
| for model in predictions: | |
| self.boxes[sequence_name][model] = predictions[model] | |
| def compute(self, | |
| iou_threshold: float = 0.01, | |
| only_tp = True): | |
| """Compute the metric value""" | |
| output = {} | |
| for sequence in self.boxes: | |
| ious = np.array([]) | |
| beps = np.array([]) | |
| e_bottom_x = np.array([]) | |
| e_bottom_y = np.array([]) | |
| e_widths = np.array([]) | |
| e_heights = np.array([]) | |
| e_n_widths = np.array([]) | |
| e_n_heights = np.array([]) | |
| e_n_bottom_x = np.array([]) | |
| e_n_bottom_y = np.array([]) | |
| output[sequence] = {} | |
| labels = self.boxes[sequence][self.gt_field] | |
| for model in self.boxes[sequence]: | |
| detections = self.boxes[sequence][model] | |
| for i in range(len(detections)): | |
| frame_labels = labels[i] | |
| frame_detections = detections[i] | |
| iou = self.box_iou(frame_labels[:, 1:], frame_detections[:, :4]) | |
| x = torch.where(iou > iou_threshold) | |
| if x[0].shape[0]: | |
| matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() | |
| if x[0].shape[0] > 1 and only_tp: | |
| matches = matches[matches[:, 2].argsort()[::-1]] | |
| matches = matches[np.unique(matches[:, 1], return_index=True)[1]] | |
| matches = matches[matches[:, 2].argsort()[::-1]] | |
| matches = matches[np.unique(matches[:, 0], return_index=True)[1]] | |
| else: | |
| matches = np.zeros((0, 3)) | |
| labels_i, detections_i, ious_v = matches.transpose() | |
| labels_i = labels_i.astype(int) | |
| detections_i = detections_i.astype(int) | |
| for pair in zip(labels_i, detections_i, ious_v): | |
| iou = pair[2] | |
| t_box = frame_labels[pair[0]][1:] | |
| p_box = frame_detections[pair[1]][:4] | |
| bep = bbox_bep(t_box.unsqueeze(0), p_box.unsqueeze(0)) | |
| if iou < 0: | |
| raise ValueError("IoU should be greater than 0, pls contact code maintainer") | |
| if bep < 0: | |
| raise ValueError("BEP should be greater than 0, pls contact code maintainer") | |
| t_xc = (p_box[0].item()+p_box[2].item())/2 | |
| p_xc = (t_box[0].item()+t_box[2].item())/2 | |
| t_yc = p_box[3].item() | |
| p_yc = t_box[3].item() | |
| t_w = t_box[2].item()-t_box[0].item() | |
| p_w = p_box[2].item()-p_box[0].item() | |
| t_h = t_box[3].item()-t_box[1].item() | |
| p_h = p_box[3].item()-p_box[1].item() | |
| ious = np.append(ious, iou) | |
| beps = np.append(beps, bep) | |
| e_widths = np.append(e_widths, p_w-t_w) | |
| e_heights = np.append(e_heights, p_h-t_h) | |
| e_bottom_x = np.append(e_bottom_x, p_xc-t_xc) | |
| e_bottom_y = np.append(e_bottom_y, p_yc-t_yc) | |
| e_n_widths = np.append(e_n_widths, (p_w-t_w)/t_w) | |
| e_n_heights = np.append(e_n_heights, (p_h-t_h)/t_h) | |
| e_n_bottom_x = np.append(e_n_bottom_x, (p_xc-t_xc)/t_w) | |
| e_n_bottom_y = np.append(e_n_bottom_y, (p_yc-t_yc)/t_h) | |
| output[sequence][model] = { | |
| "iou": np.mean(ious), | |
| "bep": np.mean(beps), | |
| "e_bottom_x_mean": np.mean(e_bottom_x), | |
| "e_bottom_y_mean": np.mean(e_bottom_y), | |
| "e_abs_bottom_x_mean": np.mean(np.abs(e_bottom_x)), | |
| "e_abs_bottom_y_mean": np.mean(np.abs(e_bottom_y)), | |
| "e_width_mean": np.mean(e_widths), | |
| "e_height_mean": np.mean(e_heights), | |
| "e_n_bottom_x_mean": np.mean(e_n_bottom_x), | |
| "e_n_bottom_y_mean": np.mean(e_n_bottom_y), | |
| "e_n_abs_bottom_x_mean": np.mean(np.abs(e_n_bottom_x)), | |
| "e_n_abs_bottom_y_mean": np.mean(np.abs(e_n_bottom_y)), | |
| "e_n_width_mean": np.mean(e_n_widths), | |
| "e_n_height_mean": np.mean(e_n_heights), | |
| "e_bottom_x_std": np.std(e_bottom_x), | |
| "e_bottom_y_std": np.std(e_bottom_y), | |
| "e_width_std": np.std(e_widths), | |
| "e_height_std": np.std(e_heights), | |
| "e_n_bottom_x_std": np.std(e_n_bottom_x), | |
| "e_n_bottom_y_std": np.std(e_n_bottom_y), | |
| "e_n_width_std": np.std(e_n_widths), | |
| "e_n_height_std": np.std(e_n_heights), | |
| "n_matches": len(e_n_heights), | |
| } | |
| return output | |
| def summarize(result): | |
| """Summarize the results by model insteaf by sequence: model""" | |
| summary = {} | |
| for sequence in result: | |
| for model in result[sequence]: | |
| if model not in summary: | |
| summary[model] = {} | |
| for metric in result[sequence][model]: | |
| if metric not in summary[model]: | |
| summary[model][metric] = [] | |
| summary[model][metric].append(result[sequence][model][metric]) | |
| #average the results | |
| for model in summary: | |
| for metric in summary[model]: | |
| summary[model][metric] = np.mean(summary[model][metric]) | |
| return summary | |
| def payload_labels_to_tm(labels, resolution): | |
| """Convert the labels of a payload sequence to the format of torch metrics""" | |
| target_tm = [] | |
| for frame in labels: | |
| target_tm_frame = [] | |
| for det in frame: | |
| label = 0 | |
| box = det["bounding_box"] | |
| x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3] | |
| x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height | |
| target_tm_frame.append([label, x1, y1, x2, y2]) | |
| target_tm.append(torch.tensor(target_tm_frame) if len(target_tm_frame) > 0 else torch.empty((0, 5))) | |
| return target_tm | |
| def payload_preds_to_rm(preds, resolution): | |
| """Convert the predictions of a payload sequence to the format of torch metrics""" | |
| preds_tm = [] | |
| for frame in preds: | |
| pred_tm_frame = [] | |
| for det in frame: | |
| label = 0 | |
| box = det["bounding_box"] | |
| x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3] | |
| x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height | |
| conf = 1 | |
| pred_tm_frame.append([x1, y1, x2, y2, conf, label]) | |
| preds_tm.append(torch.tensor(pred_tm_frame) if len(pred_tm_frame) > 0 else torch.empty((0, 6))) | |
| return preds_tm | |
| def box_iou(box1, box2, eps=1e-7): | |
| # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py | |
| """ | |
| Return intersection-over-union (Jaccard index) of boxes. | |
| Both sets of boxes are expected to be in (x1, y1, x2, y2) format. | |
| Arguments: | |
| box1 (Tensor[N, 4]) | |
| box2 (Tensor[M, 4]) | |
| Returns: | |
| iou (Tensor[N, M]): the NxM matrix containing the pairwise | |
| IoU values for every element in boxes1 and boxes2 | |
| """ | |
| # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) | |
| (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) | |
| inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) | |
| # IoU = inter / (area1 + area2 - inter) | |
| return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) | |
| def bbox_bep(box1, box2, xywh=False, eps=1e-7, bep1 = True): | |
| """ | |
| Calculates bottom edge proximity between two boxes | |
| Input shapes are box1(1,4) to box2(n,4) | |
| Implementation of bep2 from | |
| Are object detection assessment criteria ready for maritime computer vision? | |
| """ | |
| # Get the coordinates of bounding boxes | |
| if xywh: # transform from xywh to xyxy | |
| (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) | |
| w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 | |
| b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ | |
| b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ | |
| else: # x1, y1, x2, y2 = box1 | |
| b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) | |
| b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) | |
| w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps) | |
| w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps) | |
| # Bottom edge distance (absolute value) | |
| # xb = torch.abs(b2_x2 - b1_x1) | |
| xb = torch.min(b2_x2-b1_x1, b1_x2-b2_x1) | |
| xa = w2 - xb | |
| xc = w1 - xb | |
| ybe = torch.abs(b2_y2 - b1_y2) | |
| X2 = xb/(xb+xa) | |
| Y2 = 1-ybe/h2 | |
| X1 = xb/(xb+xa+xc+eps) | |
| Y1 = 1-ybe/(torch.max(h2,h1)+eps) | |
| bep = X1*Y1 if bep1 else X2*Y2 | |
| return bep | |
| def bbox_iou(box1, box2, xywh=False, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): | |
| """ | |
| Calculates IoU, GIoU, DIoU, or CIoU between two boxes, supporting xywh/xyxy formats. | |
| Input shapes are box1(1,4) to box2(n,4). | |
| """ | |
| # Get the coordinates of bounding boxes | |
| if xywh: # transform from xywh to xyxy | |
| (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) | |
| w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 | |
| b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ | |
| b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ | |
| else: # x1, y1, x2, y2 = box1 | |
| b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) | |
| b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) | |
| w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps) | |
| w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps) | |
| # Intersection area | |
| inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * ( | |
| b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1) | |
| ).clamp(0) | |
| # Union Area | |
| union = w1 * h1 + w2 * h2 - inter + eps | |
| # IoU | |
| iou = inter / union | |
| if CIoU or DIoU or GIoU: | |
| cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1) # convex (smallest enclosing box) width | |
| ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1) # convex height | |
| if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 | |
| c2 = cw**2 + ch**2 + eps # convex diagonal squared | |
| rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 | |
| if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 | |
| v = (4 / math.pi**2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2) | |
| with torch.no_grad(): | |
| alpha = v / (v - iou + (1 + eps)) | |
| return iou - (rho2 / c2 + v * alpha) # CIoU | |
| return iou - rho2 / c2 # DIoU | |
| c_area = cw * ch + eps # convex area | |
| return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf | |
| return iou # IoU |