Spaces:
Sleeping
Sleeping
| ''' | |
| Copyright 2023 Vignesh(VK)Kotteeswaran <[email protected]> | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| ''' | |
| import numpy as np | |
| from openvino.runtime import Core | |
| from utils import DBPostProcess,draw_det_res | |
| import cv2 | |
| from ocr_inference import OCR | |
| class Text_Detection(): | |
| def __init__(self, model_path): | |
| ie = Core() | |
| print('\n', model_path) | |
| model = ie.read_model(model=model_path) | |
| self.compiled_model = ie.compile_model(model=model, device_name="CPU") | |
| self.ocr=OCR('models/ocr_fp16.xml') | |
| self.input_layer = self.compiled_model.input(0) | |
| self.output_layer = self.compiled_model.output(0) | |
| self.show_frame = None | |
| self.image_shape = None | |
| self.limit_side_len = 736 | |
| self.limit_type = 'min' | |
| self.scale= 1. / 255. | |
| self.mean= [0.485, 0.456, 0.406] | |
| self.std= [0.229, 0.224, 0.225] | |
| self.postprocess_detection=DBPostProcess() | |
| def img_decode(self, img): | |
| img = np.frombuffer(img, dtype='uint8') | |
| img = cv2.imdecode(img, 1) | |
| # print(img.shape) | |
| return img | |
| def preprocess_img(self, img): | |
| '''grayscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Create an empty array of shape (height, width, 3) for the stacked image | |
| stacked_image = np.zeros((grayscale_image.shape[0], grayscale_image.shape[1], 3), dtype=np.uint8) | |
| # Assign the grayscale image to each channel of the stacked image | |
| stacked_image[:, :, 0] = grayscale_image | |
| stacked_image[:, :, 1] = grayscale_image | |
| stacked_image[:, :, 2] = grayscale_image''' | |
| return self.resize_norm_img(img) | |
| def resize_norm_img(self, img,): | |
| data={} | |
| """ | |
| resize image to a size multiple of 32 which is required by the network | |
| args: | |
| img(array): array with shape [h, w, c] | |
| return(tuple): | |
| img, (ratio_h, ratio_w) | |
| """ | |
| limit_side_len = self.limit_side_len | |
| h, w, c = img.shape | |
| # limit the max side | |
| if self.limit_type == 'max': | |
| if max(h, w) > limit_side_len: | |
| if h > w: | |
| ratio = float(limit_side_len) / h | |
| else: | |
| ratio = float(limit_side_len) / w | |
| else: | |
| ratio = 1. | |
| elif self.limit_type == 'min': | |
| if min(h, w) < limit_side_len: | |
| if h < w: | |
| ratio = float(limit_side_len) / h | |
| else: | |
| ratio = float(limit_side_len) / w | |
| else: | |
| ratio = 1. | |
| elif self.limit_type == 'resize_long': | |
| ratio = float(limit_side_len) / max(h, w) | |
| else: | |
| raise Exception('not support limit type, image ') | |
| resize_h = int(h * ratio) | |
| resize_w = int(w * ratio) | |
| resize_h = max(int(round(resize_h / 32) * 32), 32) | |
| resize_w = max(int(round(resize_w / 32) * 32), 32) | |
| try: | |
| if int(resize_w) <= 0 or int(resize_h) <= 0: | |
| return None, (None, None) | |
| img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
| except: | |
| print(img.shape, resize_w, resize_h) | |
| img=(img.astype('float32') * self.scale - self.mean ) / self.std | |
| img=img.transpose((2, 0, 1)) | |
| ratio_h = resize_h / float(h) | |
| ratio_w = resize_w / float(w) | |
| data['img']=img | |
| data['shape_list']=[h,w,ratio_h,ratio_w] | |
| return data | |
| def predict(self, src): | |
| imgs = [] | |
| src_imgs=[] | |
| shape_list=[] | |
| show_frames = [] | |
| for item in src: | |
| if hasattr(item, 'shape'): | |
| preprocessed_data=self.preprocess_img(item) | |
| src_imgs.append(item) | |
| elif isinstance(item, str): | |
| with open(item, 'rb') as f: | |
| content = f.read() | |
| decoded_img=self.img_decode(content) | |
| preprocessed_data = self.preprocess_img(decoded_img) | |
| src_imgs.append(decoded_img) | |
| else: | |
| return "Error: Invalid Input" | |
| imgs.append(np.expand_dims(preprocessed_data['img'], axis=0)) | |
| shape_list.append(preprocessed_data['shape_list']) | |
| show_frames.append(self.show_frame) | |
| blob = np.concatenate(imgs, axis=0).astype(np.float32) | |
| outputs = self.compiled_model([blob])[self.output_layer] | |
| outputs=self.postprocess_detection(outputs,shape_list) | |
| texts=["Found texts:"] | |
| for i,src_img in enumerate(src_imgs): | |
| dt_boxes = outputs[0][i]['points'] | |
| dt_scores= outputs[1][i]['scores'] | |
| #draw_det_res(dt_boxes, dt_scores, src_img,f"output_{i}.jpg") | |
| if len(dt_boxes) > 0: | |
| j=0 | |
| for score, box in zip(dt_scores, dt_boxes): | |
| pts = np.array(box).astype(np.int32).reshape((-1, 1, 2)) | |
| mask = np.zeros(src_img.shape[:2], dtype=np.uint8) | |
| cv2.fillPoly(mask, [pts], 255) | |
| # Extract the region | |
| result = cv2.bitwise_and(src_img,src_img, mask=mask) | |
| # Find bounding box and crop | |
| x, y, w, h = cv2.boundingRect(pts) | |
| cropped = result[y:y + h, x:x + w,:] | |
| print('cropped:',cropped.shape) | |
| #cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) | |
| #cv2.imwrite(f"output_{i}_{j}.jpg",cropped) | |
| texts.append(self.ocr.predict([cropped])) | |
| j+=1 | |
| return "\n".join(texts) | |