Spaces:
Sleeping
Sleeping
| ''' | |
| Copyright 2023 Vignesh(VK)Kotteeswaran <[email protected]> | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| ''' | |
| import numpy as np | |
| from openvino.runtime import Core | |
| import math | |
| import cv2 | |
| from utils import CTCLabelDecode | |
| class OCR(): | |
| def __init__(self,model_path): | |
| ie = Core() | |
| print('\n',model_path) | |
| model = ie.read_model(model=model_path) | |
| self.compiled_model = ie.compile_model(model=model, device_name="CPU") | |
| self.input_layer = self.compiled_model.input(0) | |
| self.output_layer = self.compiled_model.output(0) | |
| self.decoder=CTCLabelDecode('dict.txt',True) | |
| self.show_frame=None | |
| self.image_shape=None | |
| self.dynamic_width=False | |
| def img_decode(self,img): | |
| img = np.frombuffer(img, dtype='uint8') | |
| img=cv2.imdecode(img, 1) | |
| #print(img.shape) | |
| return img | |
| def preprocess_img(self,img): | |
| grayscale_image = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) | |
| # Create an empty array of shape (height, width, 3) for the stacked image | |
| stacked_image = np.zeros((grayscale_image.shape[0], grayscale_image.shape[1], 3), dtype=np.uint8) | |
| # Assign the grayscale image to each channel of the stacked image | |
| stacked_image[:, :, 0] = grayscale_image | |
| stacked_image[:, :, 1] = grayscale_image | |
| stacked_image[:, :, 2] = grayscale_image | |
| return self.resize_norm_img(stacked_image) | |
| def resize_norm_img(self,img, | |
| padding=True, | |
| interpolation=cv2.INTER_LINEAR): | |
| self.image_shape=[3,48,int(img.shape[1]*2)] | |
| imgC,imgH,imgW=self.image_shape | |
| # todo: change to 0 and modified image shape | |
| max_wh_ratio = imgW * 1.0 / imgH | |
| h, w = img.shape[0], img.shape[1] | |
| ratio = w * 1.0 / h | |
| max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio) | |
| imgW = int(imgH * max_wh_ratio) | |
| if math.ceil(imgH * ratio) > imgW: | |
| resized_w = imgW | |
| else: | |
| resized_w = int(math.ceil(imgH * ratio)) | |
| resized_image = cv2.resize(img, (resized_w, imgH)) | |
| self.show_frame=resized_image | |
| resized_image = resized_image.astype('float32') | |
| if self.image_shape[0] == 1: | |
| resized_image = resized_image / 255 | |
| resized_image = resized_image[np.newaxis, :] | |
| else: | |
| resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
| resized_image -= 0.5 | |
| resized_image /= 0.5 | |
| padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
| padding_im[:, :, 0:resized_w] = resized_image | |
| return padding_im | |
| def predict(self,src): | |
| imgs=[] | |
| show_frames=[] | |
| for item in src: | |
| if hasattr(item,'shape'): | |
| imgs.append(np.expand_dims(self.preprocess_img(item),axis=0)) | |
| elif isinstance(item,str): | |
| with open(item, 'rb') as f: | |
| content=f.read() | |
| imgs.append(np.expand_dims(self.preprocess_img(self.img_decode(content)),axis=0)) | |
| else: | |
| return "Error: Invalid Input" | |
| show_frames.append(self.show_frame) | |
| blob=np.concatenate(imgs,axis=0).astype(np.float32) | |
| outputs = self.compiled_model([blob])[self.output_layer] | |
| texts=[] | |
| for output in outputs: | |
| output=np.expand_dims(output,axis=0) | |
| curr_text=self.decoder(output)[0][0] | |
| texts.append(curr_text) | |
| return texts[0] | |