Spaces:
Build error
Build error
| import os | |
| import cv2 | |
| import torch | |
| from torchvision import transforms as T | |
| import torch.nn as nn | |
| from maskrcnn_benchmark.modeling.detector import build_detection_model | |
| from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer | |
| from maskrcnn_benchmark.structures.image_list import to_image_list | |
| from maskrcnn_benchmark.config import cfg | |
| from maskrcnn_benchmark.utils.chars import getstr_grid, get_tight_rect | |
| from maskrcnn_benchmark.data.datasets.evaluation.word.alfashape import getAlfaShapes | |
| from maskrcnn_benchmark.modeling.roi_heads.boundary_head.inference import Masker | |
| from shapely.geometry import * | |
| import random | |
| from torchvision.transforms import functional as F | |
| from PIL import Image | |
| import numpy as np | |
| import argparse | |
| class Resize(object): | |
| def __init__(self, min_size, max_size): | |
| if not isinstance(min_size, (list, tuple)): | |
| min_size = (min_size,) | |
| self.min_size = min_size | |
| self.max_size = max_size | |
| # modified from torchvision to add support for max size | |
| def get_size(self, image_size): | |
| w, h = image_size | |
| size = random.choice(self.min_size) | |
| max_size = self.max_size | |
| if max_size is not None: | |
| min_original_size = float(min((w, h))) | |
| max_original_size = float(max((w, h))) | |
| if max_original_size / min_original_size * size > max_size: | |
| size = int(round(max_size * min_original_size / max_original_size)) | |
| if (w <= h and w == size) or (h <= w and h == size): | |
| return (h, w) | |
| if w < h: | |
| ow = size | |
| oh = int(size * h / w) | |
| else: | |
| oh = size | |
| ow = int(size * w / h) | |
| return (oh, ow) | |
| def __call__(self, image): | |
| size = self.get_size(image.size) | |
| image = F.resize(image, size) | |
| return image | |
| class DetDemo(object): | |
| def __init__( | |
| self, | |
| cfg, | |
| confidence_threshold=0.7, | |
| min_image_size=(1200,2000), | |
| output_polygon=True | |
| ): | |
| self.cfg = cfg.clone() | |
| self.model = build_detection_model(cfg) | |
| self.model.eval() | |
| self.device = torch.device(cfg.MODEL.DEVICE) | |
| self.model.to(self.device) | |
| self.min_image_size = min_image_size | |
| checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=cfg.OUTPUT_DIR) | |
| _ = checkpointer.load(cfg.MODEL.WEIGHT) | |
| self.transforms = self.build_transform() | |
| self.cpu_device = torch.device("cpu") | |
| self.confidence_threshold = confidence_threshold | |
| self.output_polygon = output_polygon | |
| def build_transform(self): | |
| """ | |
| Creates a basic transformation that was used to train the models | |
| """ | |
| cfg = self.cfg | |
| # we are loading images with OpenCV, so we don't need to convert them | |
| # to BGR, they are already! So all we need to do is to normalize | |
| # by 255 if we want to convert to BGR255 format, or flip the channels | |
| # if we want it to be in RGB in [0-1] range. | |
| if cfg.INPUT.TO_BGR255: | |
| to_bgr_transform = T.Lambda(lambda x: x * 255) | |
| else: | |
| to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) | |
| normalize_transform = T.Normalize( | |
| mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD | |
| ) | |
| min_size = cfg.INPUT.MIN_SIZE_TEST | |
| max_size = cfg.INPUT.MAX_SIZE_TEST | |
| transform = T.Compose( | |
| [ | |
| T.ToPILImage(), | |
| Resize(min_size, max_size), | |
| T.ToTensor(), | |
| to_bgr_transform, | |
| normalize_transform, | |
| ] | |
| ) | |
| return transform | |
| def run_on_opencv_image(self, image): | |
| """ | |
| Arguments: | |
| image (np.ndarray): an image as returned by OpenCV | |
| Returns: | |
| result_polygons (list): detection results | |
| result_words (list): recognition results | |
| """ | |
| result_polygons = self.compute_prediction(image) | |
| return result_polygons | |
| def contour_to_valid(self, cnt, image_shape): | |
| """Convert rect to xys, i.e., eight points | |
| The `image_shape` is used to to make sure all points return are valid, i.e., within image area | |
| """ | |
| # rect = cv2.minAreaRect(cnt) | |
| if len(cnt.shape) != 3: | |
| assert 1 < 0 | |
| rect = cnt.reshape([cnt.shape[0], cnt.shape[2]]) | |
| h, w = image_shape[0:2] | |
| def get_valid_x(x): | |
| if x < 0: | |
| return 0 | |
| if x >= w: | |
| return w - 1 | |
| return x | |
| def get_valid_y(y): | |
| if y < 0: | |
| return 0 | |
| if y >= h: | |
| return h - 1 | |
| return y | |
| for i_xy, (x, y) in enumerate(rect): | |
| x = get_valid_x(x) | |
| y = get_valid_y(y) | |
| rect[i_xy, :] = [x, y] | |
| points = np.reshape(rect, -1) | |
| return points | |
| def _nms_y(self, heat, kernel=3): | |
| pad = (kernel - 1) // 2 | |
| hmax = nn.functional.max_pool2d( | |
| heat, (1, kernel), stride=1, padding=(0, pad)) | |
| keep = (hmax == heat).float() | |
| return heat * keep | |
| def _nms_x(self, heat, kernel=3): | |
| pad = (kernel - 1) // 2 | |
| hmax = nn.functional.max_pool2d( | |
| heat, (kernel, 1), stride=1, padding=(pad, 0)) | |
| keep = (hmax == heat).float() | |
| return heat * keep | |
| def CTW_order_lr(self, map_in): | |
| line_out_l2r = [] | |
| line_out_r2l = [] | |
| map_in = torch.tensor(map_in) | |
| value, top = torch.topk(map_in, 2, dim=0) | |
| value = value.numpy() | |
| top = top.numpy() | |
| top_th = np.where(value[1] > 0.1)[0] # L | |
| # print(top_th) | |
| if len(top_th) == 0: | |
| return [] | |
| top1 = np.sort(top, axis=0) | |
| for i in range(len(top_th)): | |
| line_out_l2r.append([top_th[i], top1[0][top_th[i]]]) | |
| line_out_r2l.append([top_th[i], top1[1][top_th[i]]]) | |
| line_out = line_out_l2r+line_out_r2l[::-1] | |
| # print(line_out) | |
| return line_out | |
| def CTW_order_bt(self, map_in): | |
| line_out_t2b = [] | |
| line_out_b2t = [] | |
| map_in = torch.tensor(map_in) | |
| value, top = torch.topk(map_in, 2, dim=1) | |
| value = value.numpy() | |
| top = top.numpy() | |
| top_th = np.where(value[:, 1] > 0.1)[0] # H | |
| if len(top_th) == 0: | |
| return [] | |
| top1 = np.sort(top, axis=1) | |
| for i in range(len(top_th)): | |
| line_out_b2t.append([top1[top_th[i]][0], top_th[i]]) | |
| line_out_t2b.append([top1[top_th[i]][1], top_th[i]]) | |
| line_out = line_out_b2t[::-1] + line_out_t2b | |
| # print(line_out) | |
| return line_out | |
| def boundary_to_mask_ic(self, bo_x, bo_y): | |
| # NMS Hmap and Vmap | |
| Vmap = self._nms_x(bo_x, kernel=5) | |
| Hmap = self._nms_y(bo_y, kernel=3) | |
| Vmap = Vmap[0] | |
| Hmap = Hmap[0] | |
| ploys_Alfa_x = Vmap.clone().numpy() | |
| ploys_Alfa_y = Hmap.clone().numpy() | |
| # Threshold Hmap and Vmap | |
| thresh = 0.5 | |
| ploys_Alfa_x[ploys_Alfa_x < thresh] = 0 | |
| ploys_Alfa_x[ploys_Alfa_x >= thresh] = 1 | |
| ploys_Alfa_y[ploys_Alfa_y < thresh] = 0 | |
| ploys_Alfa_y[ploys_Alfa_y >= thresh] = 1 | |
| # Output points with strong texture inforamtion in both maps | |
| ploys_Alfa = ploys_Alfa_x + ploys_Alfa_y | |
| ploys_Alfa[ploys_Alfa < 2] = 0 | |
| ploys_Alfa[ploys_Alfa == 2] = 1 | |
| img_draw = np.zeros([ploys_Alfa_y.shape[-1], ploys_Alfa_y.shape[-1]], dtype=np.uint8) | |
| # calculate polygon by Alpha-Shape Algorithm | |
| if ploys_Alfa.sum() == 0: | |
| return img_draw | |
| ploys_Alfa_inds = np.argwhere(ploys_Alfa == 1) | |
| zero_detect_x = ploys_Alfa_inds[:, 0] - ploys_Alfa_inds[0, 0] | |
| zero_detect_y = ploys_Alfa_inds[:, 1] - ploys_Alfa_inds[0, 1] | |
| if np.where(zero_detect_x != 0)[0].shape[0] == 0 or np.where(zero_detect_y != 0)[0].shape[0] == 0 or \ | |
| ploys_Alfa_inds.shape[0] < 4: | |
| draw_line = ploys_Alfa_inds[np.newaxis, np.newaxis, :, :] | |
| cv2.fillPoly(img_draw, draw_line, 1) | |
| return img_draw | |
| ploys_Alfa_inds = ploys_Alfa_inds.tolist() | |
| ploys_Alfa_inds = [tuple(ploys_Alfa_ind) for ploys_Alfa_ind in ploys_Alfa_inds] | |
| lines = getAlfaShapes(ploys_Alfa_inds, alfas=[1]) | |
| draw_line = np.array(lines) | |
| if len(draw_line.shape) == 4: | |
| if draw_line.shape[1] == 1: | |
| draw_line[0, 0, :, :] = draw_line[0, 0, :, ::-1] | |
| cv2.fillPoly(img_draw, draw_line, 1) | |
| else: | |
| i_draw = 0 | |
| for draw_l in draw_line[0]: | |
| img_draw_new = np.zeros([28, 28], dtype=np.uint8) | |
| draw_l = draw_l[np.newaxis, np.newaxis, :, :] | |
| cv2.fillPoly(img_draw, np.int32(draw_l), 1) | |
| cv2.fillPoly(img_draw_new, np.int32(draw_l), 1) | |
| i_draw += 1 | |
| else: | |
| for i, line in enumerate(lines[0]): | |
| draw_line = np.array(line) | |
| draw_line = draw_line[np.newaxis, np.newaxis, :, :] | |
| draw_line[0, 0, :, :] = draw_line[0, 0, :, ::-1] | |
| cv2.fillPoly(img_draw, draw_line, 1) | |
| return img_draw | |
| def boundary_to_mask_ctw(self, bo_x, bo_y, p_temp_box): | |
| w_half = (p_temp_box[2] - p_temp_box[0]) * .5 | |
| h_half = (p_temp_box[3] - p_temp_box[1]) * .5 | |
| thresh_total = 0.5 | |
| if w_half >= h_half: | |
| # point re-scoring | |
| bo_x = self._nms_x(bo_x, kernel=9) | |
| bo_x = bo_x[0] | |
| bo_y = bo_y[0] | |
| ploys_Alfa_x = bo_x.clone().numpy() | |
| ploys_Alfa_y = bo_y.clone().numpy() | |
| thresh_x = thresh_total | |
| thresh_y = thresh_total | |
| ploys_Alfa_x_1 = bo_x.clone().numpy() | |
| ploys_Alfa_y_1 = bo_y.clone().numpy() | |
| ploys_Alfa__1 = ploys_Alfa_x_1 + ploys_Alfa_y_1 | |
| ploys_Alfa_x[ploys_Alfa_x < thresh_x] = 0 | |
| ploys_Alfa_x[ploys_Alfa_x >= thresh_x] = 1 | |
| ploys_Alfa_y[ploys_Alfa_y < thresh_y] = 0 | |
| ploys_Alfa_y[ploys_Alfa_y >= thresh_y] = 1 | |
| ploys_Alfa = ploys_Alfa_x + ploys_Alfa_y | |
| ploys_Alfa[ploys_Alfa < 2] = 0 | |
| ploys_Alfa[ploys_Alfa == 2] = 1 | |
| ploys_Alfa *= ploys_Alfa__1 | |
| # rebuild text region from contour points | |
| img_draw = np.zeros([ploys_Alfa_y.shape[-1], ploys_Alfa_y.shape[-1]], dtype=np.uint8) | |
| if ploys_Alfa.sum() == 0: | |
| return img_draw | |
| lines = self.CTW_order_lr(ploys_Alfa) | |
| else: | |
| bo_y = self._nms_y(bo_y,kernel=9) | |
| bo_x = bo_x[0] | |
| bo_y = bo_y[0] | |
| ploys_Alfa_x = bo_x.clone().numpy() | |
| ploys_Alfa_y = bo_y.clone().numpy() | |
| thresh_x = thresh_total | |
| thresh_y = thresh_total | |
| ploys_Alfa_x_1 = bo_x.clone().numpy() | |
| ploys_Alfa_y_1 = bo_y.clone().numpy() | |
| ploys_Alfa__1 = ploys_Alfa_x_1 + ploys_Alfa_y_1 | |
| ploys_Alfa_x[ploys_Alfa_x < thresh_x] = 0 | |
| ploys_Alfa_x[ploys_Alfa_x >= thresh_x] = 1 | |
| ploys_Alfa_y[ploys_Alfa_y < thresh_y] = 0 | |
| ploys_Alfa_y[ploys_Alfa_y >= thresh_y] = 1 | |
| ploys_Alfa = ploys_Alfa_x + ploys_Alfa_y | |
| ploys_Alfa[ploys_Alfa < 2] = 0 | |
| ploys_Alfa[ploys_Alfa == 2] = 1 | |
| ploys_Alfa *= ploys_Alfa__1 | |
| img_draw = np.zeros([ploys_Alfa_y.shape[-1], ploys_Alfa_y.shape[-1]], dtype=np.uint8) | |
| if ploys_Alfa.sum() == 0: | |
| return img_draw | |
| lines = self.CTW_order_bt(ploys_Alfa) | |
| if len(lines) <=10: | |
| return img_draw | |
| draw_line = np.array(lines) | |
| draw_line = draw_line[np.newaxis, np.newaxis, :, :] | |
| cv2.fillPoly(img_draw, draw_line, 1) | |
| img_draw = img_draw.astype(np.uint8) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) | |
| img_draw = cv2.morphologyEx(img_draw, cv2.MORPH_CLOSE, kernel) | |
| return img_draw | |
| def contour_to_xys(self, cnt, image_shape): | |
| """Convert rect to xys, i.e., eight points | |
| The `image_shape` is used to to make sure all points return are valid, i.e., within image area | |
| """ | |
| rect = cv2.minAreaRect(cnt) | |
| h, w = image_shape[0:2] | |
| def get_valid_x(x): | |
| if x < 0: | |
| return 0 | |
| if x >= w: | |
| return w - 1 | |
| return x | |
| def get_valid_y(y): | |
| if y < 0: | |
| return 0 | |
| if y >= h: | |
| return h - 1 | |
| return y | |
| points = cv2.boxPoints(rect) | |
| points = np.int0(points) | |
| for i_xy, (x, y) in enumerate(points): | |
| x = get_valid_x(x) | |
| y = get_valid_y(y) | |
| points[i_xy, :] = [x, y] | |
| points = np.reshape(points, -1) | |
| return points | |
| def mask_to_roRect(self, mask, img_shape): | |
| ## convert mask into rotated rect | |
| e = mask[0, :, :] | |
| _, countours, hier = cv2.findContours(e.clone().numpy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) # Aarlog | |
| if len(countours) == 0: | |
| return np.zeros((1, 8)) | |
| t_c = countours[0].copy() | |
| quad = self.contour_to_xys(t_c, img_shape) | |
| return quad | |
| def mask_to_contours(self, mask, img_shape): | |
| e = mask[0, :, :] | |
| countours, hier = cv2.findContours(e.clone().numpy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) # Aarlog | |
| if len(countours) == 0: | |
| return np.zeros((1, 8)) | |
| t_c = countours[0].copy() | |
| quad = self.contour_to_valid(t_c, img_shape) | |
| return quad | |
| def py_cpu_pnms(self, dets, scores, thresh): | |
| pts = [] | |
| for det in dets: | |
| pts.append([[det[i][0], det[i][1]] for i in range(len(det))]) | |
| order = scores.argsort()[::-1] | |
| areas = np.zeros(scores.shape) | |
| order = scores.argsort()[::-1] | |
| inter_areas = np.zeros((scores.shape[0], scores.shape[0])) | |
| for il in range(len(pts)): | |
| poly = Polygon(pts[il]) | |
| areas[il] = poly.area | |
| for jl in range(il, len(pts)): | |
| polyj = Polygon(pts[jl]) | |
| try: | |
| inS = poly.intersection(polyj) | |
| except: | |
| print(poly, polyj) | |
| inter_areas[il][jl] = inS.area | |
| inter_areas[jl][il] = inS.area | |
| keep = [] | |
| while order.size > 0: | |
| i = order[0] | |
| keep.append(i) | |
| ovr = inter_areas[i][order[1:]] / (areas[i] + areas[order[1:]] - inter_areas[i][order[1:]]) | |
| inds = np.where(ovr <= thresh)[0] | |
| order = order[inds + 1] | |
| return keep | |
| def esd_pnms(self, esd, pnms_thresh): | |
| scores = [] | |
| dets = [] | |
| for ele in esd: | |
| score = ele['score'] | |
| quad = ele['seg_rorect'] | |
| # det = np.array([[quad[0][0], quad[0][1]], [quad[1][0], quad[1][1]],[quad[2][0], quad[2][1]],[quad[3][0], quad[3][1]]]) | |
| det = np.array([[quad[0], quad[1]], [quad[2], quad[3]], [quad[4], quad[5]], [quad[6], quad[7]]]) | |
| scores.append(score) | |
| dets.append(det) | |
| scores = np.array(scores) | |
| dets = np.array(dets) | |
| keep = self.py_cpu_pnms(dets, scores, pnms_thresh) | |
| return keep | |
| def compute_prediction(self, original_image): | |
| # apply pre-processing to image | |
| image = self.transforms(original_image) | |
| # convert to an ImageList, padded so that it is divisible by | |
| # cfg.DATALOADER.SIZE_DIVISIBILITY | |
| image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) | |
| image_list = image_list.to(self.device) | |
| # compute predictions | |
| with torch.no_grad(): | |
| output = self.model(image_list) | |
| prediction = [o.to(self.cpu_device) for o in output][0] | |
| #global_predictions = predictions[0] | |
| #char_predictions = predictions[1] | |
| #char_mask = char_predictions['char_mask'] | |
| #char_boxes = char_predictions['boxes'] | |
| #words, rec_scores = self.process_char_mask(char_mask, char_boxes) | |
| #seq_words = char_predictions['seq_outputs'] | |
| #seq_scores = char_predictions['seq_scores'] | |
| # reshape prediction (a BoxList) into the original image size | |
| image_height, image_width = original_image.shape[:-1] | |
| prediction = prediction.resize((image_width, image_height)) | |
| if len(prediction) == 0: | |
| return | |
| prediction = prediction.convert("xyxy") | |
| boxes = prediction.bbox.tolist() | |
| scores = prediction.get_field("scores").tolist() | |
| masks_x = prediction.get_field("mask_x") | |
| masks_y = prediction.get_field("mask_y") | |
| #masks = [self.boundary_to_mask_ic(mask_x, mask_y) for | |
| # mask_x, mask_y in zip(masks_x, masks_y)] | |
| masks = [self.boundary_to_mask_ctw(mask_x, mask_y, p_temp) for | |
| mask_x, mask_y, p_temp in zip(masks_x, masks_y, prediction.bbox)] | |
| masks = torch.from_numpy(np.array(masks)[:, np.newaxis, :, :]) | |
| # Masker is necessary only if masks haven't been already resized. | |
| masker = Masker(threshold=0.5, padding=1) | |
| if list(masks.shape[-2:]) != [image_height, image_width]: | |
| masks = masker(masks.expand(1, -1, -1, -1, -1), prediction) | |
| masks = masks[0] | |
| ''' | |
| rects = [self.mask_to_roRect(mask, [image_height, image_width]) for mask in masks] | |
| esd = [] | |
| for k, rect in enumerate(rects): | |
| if rect.all() == 0: | |
| continue | |
| else: | |
| esd.append( | |
| { | |
| "seg_rorect": rect.tolist(), | |
| "score": scores[k], | |
| } | |
| ) | |
| if cfg.PROCESS.PNMS: | |
| pnms_thresh = cfg.PROCESS.NMS_THRESH | |
| keep = self.esd_pnms(esd, pnms_thresh) | |
| im_write = cv2.imread('./demo/1.jpg')[:, :, ::-1] | |
| for i in keep: | |
| box = esd[i] | |
| # print(box) | |
| # assert 1<0 | |
| box = np.array(box['seg_rorect']) | |
| box = np.around(box).astype(np.int32) | |
| cv2.polylines(im_write[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, | |
| color=(0, 255, 0), thickness=2) # 0,255,255 y 0,255,0 g | |
| cv2.imwrite('./demo/example_results.jpg', im_write[:, :, ::-1]) | |
| ''' | |
| contours = [self.mask_to_contours(mask, [image_height, image_width]) for mask in masks] | |
| ''' | |
| im_write = original_image[:, :, ::-1] | |
| for box in contours: | |
| box = np.array(box) | |
| box = np.around(box).astype(np.int32) | |
| cv2.polylines(im_write[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) # 0,255,255 y 0,255,0 g | |
| cv2.imwrite('./demo/example_results.jpg', im_write[:, :, ::-1]) | |
| ''' | |
| return contours, np.array(masks.repeat(1,3,1,1)).astype(np.bool_).transpose(0,2,3,1), boxes | |
| def process_char_mask(self, char_masks, boxes, threshold=192): | |
| texts, rec_scores = [], [] | |
| for index in range(char_masks.shape[0]): | |
| box = list(boxes[index]) | |
| box = list(map(int, box)) | |
| text, rec_score, _, _ = getstr_grid(char_masks[index,:,:,:].copy(), box, threshold=threshold) | |
| texts.append(text) | |
| rec_scores.append(rec_score) | |
| return texts, rec_scores | |
| def mask2polygon(self, mask, box, im_size, threshold=0.5, output_polygon=True): | |
| # mask 32*128 | |
| image_width, image_height = im_size[1], im_size[0] | |
| box_h = box[3] - box[1] | |
| box_w = box[2] - box[0] | |
| cls_polys = (mask*255).astype(np.uint8) | |
| poly_map = np.array(Image.fromarray(cls_polys).resize((box_w, box_h))) | |
| poly_map = poly_map.astype(np.float32) / 255 | |
| poly_map=cv2.GaussianBlur(poly_map,(3,3),sigmaX=3) | |
| ret, poly_map = cv2.threshold(poly_map,0.5,1,cv2.THRESH_BINARY) | |
| if output_polygon: | |
| SE1=cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) | |
| poly_map = cv2.erode(poly_map,SE1) | |
| poly_map = cv2.dilate(poly_map,SE1); | |
| poly_map = cv2.morphologyEx(poly_map,cv2.MORPH_CLOSE,SE1) | |
| try: | |
| _, contours, _ = cv2.findContours((poly_map * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) | |
| except: | |
| contours, _ = cv2.findContours((poly_map * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) | |
| if len(contours)==0: | |
| print(contours) | |
| print(len(contours)) | |
| return None | |
| max_area=0 | |
| max_cnt = contours[0] | |
| for cnt in contours: | |
| area=cv2.contourArea(cnt) | |
| if area > max_area: | |
| max_area = area | |
| max_cnt = cnt | |
| perimeter = cv2.arcLength(max_cnt,True) | |
| epsilon = 0.01*cv2.arcLength(max_cnt,True) | |
| approx = cv2.approxPolyDP(max_cnt,epsilon,True) | |
| pts = approx.reshape((-1,2)) | |
| pts[:,0] = pts[:,0] + box[0] | |
| pts[:,1] = pts[:,1] + box[1] | |
| polygon = list(pts.reshape((-1,))) | |
| polygon = list(map(int, polygon)) | |
| if len(polygon)<6: | |
| return None | |
| else: | |
| SE1=cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) | |
| poly_map = cv2.erode(poly_map,SE1) | |
| poly_map = cv2.dilate(poly_map,SE1); | |
| poly_map = cv2.morphologyEx(poly_map,cv2.MORPH_CLOSE,SE1) | |
| idy,idx=np.where(poly_map == 1) | |
| xy=np.vstack((idx,idy)) | |
| xy=np.transpose(xy) | |
| hull = cv2.convexHull(xy, clockwise=True) | |
| #reverse order of points. | |
| if hull is None: | |
| return None | |
| hull=hull[::-1] | |
| #find minimum area bounding box. | |
| rect = cv2.minAreaRect(hull) | |
| corners = cv2.boxPoints(rect) | |
| corners = np.array(corners, dtype="int") | |
| pts = get_tight_rect(corners, box[0], box[1], image_height, image_width, 1) | |
| polygon = [x * 1.0 for x in pts] | |
| polygon = list(map(int, polygon)) | |
| return polygon | |
| def visualization(self, image, polygons, masks, boxes): | |
| green = np.ones(image.shape).astype(np.uint8) | |
| green[...,0] = 0 | |
| green[...,1] = 255 | |
| green[...,2] = 0 | |
| for mask in masks: | |
| image[mask] = image[mask] * 0.5 + green[mask] * 0.5 | |
| for box in boxes: | |
| cv2.rectangle(image,(box[0], box[1]), (box[2], box[3]), (0,0,255), 5) | |
| ''' | |
| for polygon in polygons: | |
| pts = np.array(polygon, np.int32) | |
| pts = pts.reshape((-1,1,2)) | |
| xmin = min(pts[:,0,0]) | |
| ymin = min(pts[:,0,1]) | |
| cv2.polylines(image,[pts],True,(0,0,255)) | |
| #cv2.putText(image, word, (xmin, ymin), cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,255), 2) | |
| ''' | |
| return image | |
| def main(args): | |
| # update the config options with the config file | |
| cfg.merge_from_file(args.config_file) | |
| # manual override some options | |
| # cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) | |
| text_demo = TextDemo( | |
| cfg, | |
| min_image_size=(1200,2000), | |
| confidence_threshold=0.85, | |
| output_polygon=True | |
| ) | |
| # load image and then run prediction | |
| image = cv2.imread(args.image_path) | |
| result_polygons, result_masks = text_demo.run_on_opencv_image(image) | |
| image = text_demo.visualization(image, result_polygons, result_masks) | |
| cv2.imwrite(args.visu_path, image) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description='parameters for demo') | |
| parser.add_argument("--config-file", type=str, default='./configs/ctw/r50_baseline.yaml') | |
| parser.add_argument("--image_path", type=str, default='./det_visual/1223.jpg') | |
| parser.add_argument("--visu_path", type=str, default='./demo/example_results.jpg') | |
| args = parser.parse_args() | |
| main(args) | |