Spaces:

RapidAI
/

RapidOCR

Running

App Files Files Community

SWHL commited on Dec 19, 2022

Commit

00e3b6c

1 Parent(s): 461c5b6

Update rapidocr_onnxruntime

Browse files

Files changed (23) hide show

resources/fonts/FZYTK.TTF → FZYTK.TTF +0 -0
app.py +19 -19
config.yaml +0 -72
rapidocr_onnxruntime/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml +0 -14
rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py +0 -117
rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py +0 -80
rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml +0 -29
rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py +0 -127
rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py +0 -452
rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml +0 -12
rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py +0 -120
rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py +0 -128
rapidocr_onnxruntime/rapid_ocr_api.py +0 -176
requirements.txt +1 -8
resources/fonts/.gitkeep +0 -0
resources/models/.gitkeep +0 -0
resources/models/ch_PP-OCRv3_det_infer.onnx +0 -3
resources/models/ch_PP-OCRv3_rec_infer.onnx +0 -3
resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx +0 -3

resources/fonts/FZYTK.TTF → FZYTK.TTF RENAMED Viewed

File without changes

app.py CHANGED Viewed

@@ -5,24 +5,18 @@ os.system('pip install -r requirements.txt')
 import math
 import random
-from pathlib import Path
 import time
 import cv2
 import gradio as gr
-from rapidocr_onnxruntime import TextSystem
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
-text_sys = TextSystem('config.yaml')
 def draw_ocr_box_txt(image, boxes, txts, font_path,
                      scores=None, text_score=0.5):
-    if not Path(font_path).exists():
-        raise FileNotFoundError(f'The {font_path} does not exists! \n'
-                                f'Please download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing')
     h, w = image.height, image.width
     img_left = image.copy()
     img_right = Image.new('RGB', (w, h), (255, 255, 255))
@@ -31,12 +25,14 @@ def draw_ocr_box_txt(image, boxes, txts, font_path,
     draw_left = ImageDraw.Draw(img_left)
     draw_right = ImageDraw.Draw(img_right)
     for idx, (box, txt) in enumerate(zip(boxes, txts)):
-        if scores is not None and scores[idx] < text_score:
             continue
         color = (random.randint(0, 255),
                  random.randint(0, 255),
                  random.randint(0, 255))
         draw_left.polygon(box, fill=color)
         draw_right.polygon([box[0][0], box[0][1],
                             box[1][0], box[1][1],
@@ -73,10 +69,9 @@ def draw_ocr_box_txt(image, boxes, txts, font_path,
     return np.array(img_show)
-def visualize(image_path, boxes, rec_res, font_path="resources/fonts/FZYTK.TTF"):
     image = Image.open(image_path)
-    txts = [rec_res[i][0] for i in range(len(rec_res))]
-    scores = [rec_res[i][1] for i in range(len(rec_res))]
     draw_img = draw_ocr_box_txt(image, boxes,
                                 txts, font_path,
@@ -96,18 +91,23 @@ def visualize(image_path, boxes, rec_res, font_path="resources/fonts/FZYTK.TTF")
 def inference(img, box_thresh, unclip_ratio, text_score):
     img_path = img.name
     img = cv2.imread(img_path)
-    dt_boxes, rec_res = text_sys(img,
-                                 box_thresh=box_thresh,
-                                 unclip_ratio=unclip_ratio,
-                                 text_score=text_score)
-    img_save_path = visualize(img_path, dt_boxes, rec_res)
-    return img_save_path, rec_res
-title = 'Rapid🗲OCR Demo (捷智OCR)'
 description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
 article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
 gr.Interface(
     inference,
     inputs=[

 import math
 import random
 import time
+from pathlib import Path
 import cv2
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
+from rapidocr_onnxruntime import RapidOCR
 def draw_ocr_box_txt(image, boxes, txts, font_path,
                      scores=None, text_score=0.5):
     h, w = image.height, image.width
     img_left = image.copy()
     img_right = Image.new('RGB', (w, h), (255, 255, 255))
     draw_left = ImageDraw.Draw(img_left)
     draw_right = ImageDraw.Draw(img_right)
     for idx, (box, txt) in enumerate(zip(boxes, txts)):
+        if scores is not None and float(scores[idx]) < text_score:
             continue
         color = (random.randint(0, 255),
                  random.randint(0, 255),
                  random.randint(0, 255))
+        box = [tuple(v) for v in box]
         draw_left.polygon(box, fill=color)
         draw_right.polygon([box[0][0], box[0][1],
                             box[1][0], box[1][1],
     return np.array(img_show)
+def visualize(image_path, boxes, txts, scores,
+              font_path="./FZYTK.TTF"):
     image = Image.open(image_path)
     draw_img = draw_ocr_box_txt(image, boxes,
                                 txts, font_path,
 def inference(img, box_thresh, unclip_ratio, text_score):
     img_path = img.name
     img = cv2.imread(img_path)
+    ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh,
+                              unclip_ratio=unclip_ratio,
+                              text_score=text_score)
+    dt_boxes, rec_res, scores = list(zip(*ocr_result))
+    img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
+    output_text = [f'{one_rec} {float(score):.4f}'
+                   for one_rec, score in zip(rec_res, scores)]
+    return img_save_path, output_text
+title = 'RapidOCR Demo (捷智OCR)'
 description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
 article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+rapid_ocr = RapidOCR()
 gr.Interface(
     inference,
     inputs=[

config.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-Global:
-    text_score: 0.5
-    use_angle_cls: true
-    print_verbose: true
-    min_height: 30
-    width_height_ratio: 8
-Det:
-    module_name: ch_ppocr_v3_det
-    class_name: TextDetector
-    model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
-    use_cuda: false
-    # Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-    CUDAExecutionProvider:
-        device_id: 0
-        arena_extend_strategy: kNextPowerOfTwo
-        cudnn_conv_algo_search: EXHAUSTIVE
-        do_copy_in_default_stream: true
-    pre_process:
-        DetResizeForTest:
-            limit_side_len: 736
-            limit_type: min
-        NormalizeImage:
-            std: [0.229, 0.224, 0.225]
-            mean: [0.485, 0.456, 0.406]
-            scale: 1./255.
-            order: hwc
-        ToCHWImage:
-        KeepKeys:
-            keep_keys: ['image', 'shape']
-    post_process:
-        thresh: 0.3
-        box_thresh: 0.5
-        max_candidates: 1000
-        unclip_ratio: 1.6
-        use_dilation: true
-        score_mode: fast
-Cls:
-    module_name: ch_ppocr_v2_cls
-    class_name: TextClassifier
-    model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
-    use_cuda: false
-    CUDAExecutionProvider:
-        device_id: 0
-        arena_extend_strategy: kNextPowerOfTwo
-        cudnn_conv_algo_search: EXHAUSTIVE
-        do_copy_in_default_stream: true
-    cls_image_shape: [3, 48, 192]
-    cls_batch_num: 6
-    cls_thresh: 0.9
-    label_list: ['0', '180']
-Rec:
-    module_name: ch_ppocr_v3_rec
-    class_name: TextRecognizer
-    model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
-    use_cuda: false
-    CUDAExecutionProvider:
-        device_id: 0
-        arena_extend_strategy: kNextPowerOfTwo
-        cudnn_conv_algo_search: EXHAUSTIVE
-        do_copy_in_default_stream: true
-    rec_img_shape: [3, 48, 320]
-    rec_batch_num: 6

rapidocr_onnxruntime/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .rapid_ocr_api import TextSystem

rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_cls import TextClassifier

rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml DELETED Viewed

@@ -1,14 +0,0 @@
-model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
-use_cuda: false
-# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-CUDAExecutionProvider:
-    device_id: 0
-    arena_extend_strategy: kNextPowerOfTwo
-    cudnn_conv_algo_search: EXHAUSTIVE
-    do_copy_in_default_stream: true
-cls_image_shape: [3, 48, 192]
-cls_batch_num: 6
-cls_thresh: 0.9
-label_list: ['0', '180']

rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py DELETED Viewed

@@ -1,117 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import copy
-import math
-import time
-from typing import List
-import cv2
-import numpy as np
-try:
-    from .utils import ClsPostProcess, read_yaml, OrtInferSession
-except:
-    from utils import ClsPostProcess, read_yaml, OrtInferSession
-class TextClassifier(object):
-    def __init__(self, config):
-        self.cls_image_shape = config['cls_image_shape']
-        self.cls_batch_num = config['cls_batch_num']
-        self.cls_thresh = config['cls_thresh']
-        self.postprocess_op = ClsPostProcess(config['label_list'])
-        session_instance = OrtInferSession(config)
-        self.session = session_instance.session
-        self.input_name = session_instance.get_input_name()
-    def __call__(self, img_list: List[np.ndarray]):
-        if isinstance(img_list, np.ndarray):
-            img_list = [img_list]
-        img_list = copy.deepcopy(img_list)
-        # Calculate the aspect ratio of all text bars
-        width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
-        # Sorting can speed up the cls process
-        indices = np.argsort(np.array(width_list))
-        img_num = len(img_list)
-        cls_res = [['', 0.0]] * img_num
-        batch_num = self.cls_batch_num
-        elapse = 0
-        for beg_img_no in range(0, img_num, batch_num):
-            end_img_no = min(img_num, beg_img_no + batch_num)
-            norm_img_batch = []
-            for ino in range(beg_img_no, end_img_no):
-                norm_img = self.resize_norm_img(img_list[indices[ino]])
-                norm_img = norm_img[np.newaxis, :]
-                norm_img_batch.append(norm_img)
-            norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
-            starttime = time.time()
-            onnx_inputs = {self.input_name: norm_img_batch}
-            prob_out = self.session.run(None, onnx_inputs)[0]
-            cls_result = self.postprocess_op(prob_out)
-            elapse += time.time() - starttime
-            for rno in range(len(cls_result)):
-                label, score = cls_result[rno]
-                cls_res[indices[beg_img_no + rno]] = [label, score]
-                if '180' in label and score > self.cls_thresh:
-                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
-                        img_list[indices[beg_img_no + rno]], 1)
-        return img_list, cls_res, elapse
-    def resize_norm_img(self, img):
-        img_c, img_h, img_w = self.cls_image_shape
-        h, w = img.shape[:2]
-        ratio = w / float(h)
-        if math.ceil(img_h * ratio) > img_w:
-            resized_w = img_w
-        else:
-            resized_w = int(math.ceil(img_h * ratio))
-        resized_image = cv2.resize(img, (resized_w, img_h))
-        resized_image = resized_image.astype('float32')
-        if img_c == 1:
-            resized_image = resized_image / 255
-            resized_image = resized_image[np.newaxis, :]
-        else:
-            resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
-        padding_im[:, :, :resized_w] = resized_image
-        return padding_im
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--image_path', type=str, help='image_dir|image_path')
-    parser.add_argument('--config_path', type=str, default='config.yaml')
-    args = parser.parse_args()
-    config = read_yaml(args.config_path)
-    text_classifier = TextClassifier(config)
-    img = cv2.imread(args.image_path)
-    img_list, cls_res, predict_time = text_classifier(img)
-    for ino in range(len(img_list)):
-        print(f"cls result:{cls_res[ino]}")

rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import warnings
-import yaml
-from onnxruntime import (get_available_providers, get_device,
-                         SessionOptions, InferenceSession,
-                         GraphOptimizationLevel)
-class OrtInferSession(object):
-    def __init__(self, config):
-        sess_opt = SessionOptions()
-        sess_opt.log_severity_level = 4
-        sess_opt.enable_cpu_mem_arena = False
-        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
-        cuda_ep = 'CUDAExecutionProvider'
-        cpu_ep = 'CPUExecutionProvider'
-        cpu_provider_options = {
-            "arena_extend_strategy": "kSameAsRequested",
-        }
-        EP_list = []
-        if config['use_cuda'] and get_device() == 'GPU' \
-                and cuda_ep in get_available_providers():
-            EP_list = [(cuda_ep, config[cuda_ep])]
-        EP_list.append((cpu_ep, cpu_provider_options))
-        self.session = InferenceSession(config['model_path'],
-                                        sess_options=sess_opt,
-                                        providers=EP_list)
-        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
-            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
-                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
-                          'you can check their relations from the offical web site: '
-                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
-                          RuntimeWarning)
-    def get_input_name(self, input_idx=0):
-        return self.session.get_inputs()[input_idx].name
-    def get_output_name(self, output_idx=0):
-        return self.session.get_outputs()[output_idx].name
-def read_yaml(yaml_path):
-    with open(yaml_path, 'rb') as f:
-        data = yaml.load(f, Loader=yaml.Loader)
-    return data
-class ClsPostProcess(object):
-    """ Convert between text-label and text-index """
-    def __init__(self, label_list):
-        super(ClsPostProcess, self).__init__()
-        self.label_list = label_list
-    def __call__(self, preds, label=None):
-        pred_idxs = preds.argmax(axis=1)
-        decode_out = [(self.label_list[idx], preds[i, idx])
-                      for i, idx in enumerate(pred_idxs)]
-        if label is None:
-            return decode_out
-        label = [(self.label_list[idx], 1.0) for idx in label]
-        return decode_out, label

rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_detect import TextDetector

rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml DELETED Viewed

@@ -1,29 +0,0 @@
-model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
-use_cuda: false
-CUDAExecutionProvider:
-    device_id: 0
-    arena_extend_strategy: kNextPowerOfTwo
-    cudnn_conv_algo_search: EXHAUSTIVE
-    do_copy_in_default_stream: true
-pre_process:
-    DetResizeForTest:
-        limit_side_len: 736
-        limit_type: min
-    NormalizeImage:
-        std: [0.229, 0.224, 0.225]
-        mean: [0.485, 0.456, 0.406]
-        scale: 1./255.
-        order: hwc
-    ToCHWImage:
-    KeepKeys:
-        keep_keys: ['image', 'shape']
-post_process:
-    thresh: 0.3
-    box_thresh: 0.5
-    max_candidates: 1000
-    unclip_ratio: 1.6
-    use_dilation: true
-    score_mode: "fast"

rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py DELETED Viewed

@@ -1,127 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import argparse
-import time
-import cv2
-import numpy as np
-try:
-    from .utils import (DBPostProcess, create_operators,
-                        transform, read_yaml, OrtInferSession)
-except:
-    from utils import (DBPostProcess, create_operators,
-                       transform, read_yaml, OrtInferSession)
-class TextDetector(object):
-    def __init__(self, config):
-        self.preprocess_op = create_operators(config['pre_process'])
-        self.postprocess_op = DBPostProcess(**config['post_process'])
-        session_instance = OrtInferSession(config)
-        self.session = session_instance.session
-        self.input_name = session_instance.get_input_name()
-    def __call__(self, img):
-        if img is None:
-            raise ValueError('img is None')
-        ori_im_shape = img.shape[:2]
-        data = {'image': img}
-        data = transform(data, self.preprocess_op)
-        img, shape_list = data
-        if img is None:
-            return None, 0
-        img = np.expand_dims(img, axis=0).astype(np.float32)
-        shape_list = np.expand_dims(shape_list, axis=0)
-        starttime = time.time()
-        preds = self.session.run(None, {self.input_name: img})
-        post_result = self.postprocess_op(preds[0], shape_list)
-        dt_boxes = post_result[0]['points']
-        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
-        elapse = time.time() - starttime
-        return dt_boxes, elapse
-    def order_points_clockwise(self, pts):
-        """
-        reference from:
-        https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
-        sort the points based on their x-coordinates
-        """
-        xSorted = pts[np.argsort(pts[:, 0]), :]
-        # grab the left-most and right-most points from the sorted
-        # x-roodinate points
-        leftMost = xSorted[:2, :]
-        rightMost = xSorted[2:, :]
-        # now, sort the left-most coordinates according to their
-        # y-coordinates so we can grab the top-left and bottom-left
-        # points, respectively
-        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
-        (tl, bl) = leftMost
-        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
-        (tr, br) = rightMost
-        rect = np.array([tl, tr, br, bl], dtype="float32")
-        return rect
-    def clip_det_res(self, points, img_height, img_width):
-        for pno in range(points.shape[0]):
-            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
-            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
-        return points
-    def filter_tag_det_res(self, dt_boxes, image_shape):
-        img_height, img_width = image_shape[:2]
-        dt_boxes_new = []
-        for box in dt_boxes:
-            box = self.order_points_clockwise(box)
-            box = self.clip_det_res(box, img_height, img_width)
-            rect_width = int(np.linalg.norm(box[0] - box[1]))
-            rect_height = int(np.linalg.norm(box[0] - box[3]))
-            if rect_width <= 3 or rect_height <= 3:
-                continue
-            dt_boxes_new.append(box)
-        dt_boxes = np.array(dt_boxes_new)
-        return dt_boxes
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--config_path', type=str, default='config.yaml')
-    parser.add_argument('--image_path', type=str, default=None)
-    args = parser.parse_args()
-    config = read_yaml(args.config_path)
-    text_detector = TextDetector(config)
-    img = cv2.imread(args.image_path)
-    dt_boxes, elapse = text_detector(img)
-    from utils import draw_text_det_res
-    src_im = draw_text_det_res(dt_boxes, args.image_path)
-    cv2.imwrite('det_results.jpg', src_im)
-    print('The det_results.jpg has been saved in the current directory.')

rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py DELETED Viewed

@@ -1,452 +0,0 @@
-"""
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import sys
-import warnings
-import cv2
-import numpy as np
-import pyclipper
-import six
-import yaml
-from shapely.geometry import Polygon
-from onnxruntime import (get_available_providers, get_device,
-                         SessionOptions, InferenceSession,
-                         GraphOptimizationLevel)
-class OrtInferSession(object):
-    def __init__(self, config):
-        sess_opt = SessionOptions()
-        sess_opt.log_severity_level = 4
-        sess_opt.enable_cpu_mem_arena = False
-        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
-        cuda_ep = 'CUDAExecutionProvider'
-        cpu_ep = 'CPUExecutionProvider'
-        cpu_provider_options = {
-            "arena_extend_strategy": "kSameAsRequested",
-        }
-        EP_list = []
-        if config['use_cuda'] and get_device() == 'GPU' \
-                and cuda_ep in get_available_providers():
-            EP_list = [(cuda_ep, config[cuda_ep])]
-        EP_list.append((cpu_ep, cpu_provider_options))
-        self.session = InferenceSession(config['model_path'],
-                                        sess_options=sess_opt,
-                                        providers=EP_list)
-        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
-            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
-                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
-                          'you can check their relations from the offical web site: '
-                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
-                          RuntimeWarning)
-    def get_input_name(self, input_idx=0):
-        return self.session.get_inputs()[input_idx].name
-    def get_output_name(self, output_idx=0):
-        return self.session.get_outputs()[output_idx].name
-def read_yaml(yaml_path):
-    with open(yaml_path, 'rb') as f:
-        data = yaml.load(f, Loader=yaml.Loader)
-    return data
-class DecodeImage(object):
-    """ decode image """
-    def __init__(self, img_mode='RGB', channel_first=False):
-        self.img_mode = img_mode
-        self.channel_first = channel_first
-    def __call__(self, data):
-        img = data['image']
-        if six.PY2:
-            assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"
-        img = np.frombuffer(img, dtype='uint8')
-        img = cv2.imdecode(img, 1)
-        if img is None:
-            return None
-        if self.img_mode == 'GRAY':
-            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        elif self.img_mode == 'RGB':
-            assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]'
-            img = img[:, :, ::-1]
-        if self.channel_first:
-            img = img.transpose((2, 0, 1))
-        data['image'] = img
-        return data
-class NormalizeImage(object):
-    """ normalize image such as substract mean, divide std"""
-    def __init__(self, scale=None, mean=None, std=None, order='chw'):
-        if isinstance(scale, str):
-            scale = eval(scale)
-        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
-        mean = mean if mean is not None else [0.485, 0.456, 0.406]
-        std = std if std is not None else [0.229, 0.224, 0.225]
-        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
-        self.mean = np.array(mean).reshape(shape).astype('float32')
-        self.std = np.array(std).reshape(shape).astype('float32')
-    def __call__(self, data):
-        img = np.array(data['image']).astype(np.float32)
-        data['image'] = (img * self.scale - self.mean) / self.std
-        return data
-class ToCHWImage(object):
-    """ convert hwc image to chw image"""
-    def __init__(self):
-        pass
-    def __call__(self, data):
-        img = data['image']
-        from PIL import Image
-        if isinstance(img, Image.Image):
-            img = np.array(img)
-        data['image'] = img.transpose((2, 0, 1))
-        return data
-class KeepKeys(object):
-    def __init__(self, keep_keys):
-        self.keep_keys = keep_keys
-    def __call__(self, data):
-        data_list = []
-        for key in self.keep_keys:
-            data_list.append(data[key])
-        return data_list
-class DetResizeForTest(object):
-    def __init__(self, **kwargs):
-        super(DetResizeForTest, self).__init__()
-        self.resize_type = 0
-        if 'image_shape' in kwargs:
-            self.image_shape = kwargs['image_shape']
-            self.resize_type = 1
-        elif 'limit_side_len' in kwargs:
-            self.limit_side_len = kwargs.get('limit_side_len', 736)
-            self.limit_type = kwargs.get('limit_type', 'min')
-        if 'resize_long' in kwargs:
-            self.resize_type = 2
-            self.resize_long = kwargs.get('resize_long', 960)
-        else:
-            self.limit_side_len = kwargs.get('limit_side_len', 736)
-            self.limit_type = kwargs.get('limit_type', 'min')
-    def __call__(self, data):
-        img = data['image']
-        src_h, src_w = img.shape[:2]
-        if self.resize_type == 0:
-            # img, shape = self.resize_image_type0(img)
-            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
-        elif self.resize_type == 2:
-            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
-        else:
-            # img, shape = self.resize_image_type1(img)
-            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
-        data['image'] = img
-        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
-        return data
-    def resize_image_type1(self, img):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = img.shape[:2]  # (h, w, c)
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        # return img, np.array([ori_h, ori_w])
-        return img, [ratio_h, ratio_w]
-    def resize_image_type0(self, img):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        limit_side_len = self.limit_side_len
-        h, w = img.shape[:2]
-        # limit the max side
-        if self.limit_type == 'max':
-            if max(h, w) > limit_side_len:
-                if h > w:
-                    ratio = float(limit_side_len) / h
-                else:
-                    ratio = float(limit_side_len) / w
-            else:
-                ratio = 1.
-        else:
-            if min(h, w) < limit_side_len:
-                if h < w:
-                    ratio = float(limit_side_len) / h
-                else:
-                    ratio = float(limit_side_len) / w
-            else:
-                ratio = 1.
-        resize_h = int(h * ratio)
-        resize_w = int(w * ratio)
-        resize_h = int(round(resize_h / 32) * 32)
-        resize_w = int(round(resize_w / 32) * 32)
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        except:
-            print(img.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return img, [ratio_h, ratio_w]
-    def resize_image_type2(self, img):
-        h, w = img.shape[:2]
-        resize_w = w
-        resize_h = h
-        # Fix the longer side
-        if resize_h > resize_w:
-            ratio = float(self.resize_long) / resize_h
-        else:
-            ratio = float(self.resize_long) / resize_w
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-        max_stride = 128
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return img, [ratio_h, ratio_w]
-def transform(data, ops=None):
-    """ transform """
-    if ops is None:
-        ops = []
-    for op in ops:
-        data = op(data)
-        if data is None:
-            return None
-    return data
-def create_operators(op_param_dict):
-    """
-    create operators based on the config
-    """
-    ops = []
-    for op_name, param in op_param_dict.items():
-        if param is None:
-            param = {}
-        op = eval(op_name)(**param)
-        ops.append(op)
-    return ops
-def draw_text_det_res(dt_boxes, img_path):
-    src_im = cv2.imread(img_path)
-    for box in dt_boxes:
-        box = np.array(box).astype(np.int32).reshape(-1, 2)
-        cv2.polylines(src_im, [box], True,
-                      color=(255, 255, 0), thickness=2)
-    return src_im
-class DBPostProcess(object):
-    """The post process for Differentiable Binarization (DB)."""
-    def __init__(self,
-                 thresh=0.3,
-                 box_thresh=0.7,
-                 max_candidates=1000,
-                 unclip_ratio=2.0,
-                 score_mode="fast",
-                 use_dilation=False):
-        self.thresh = thresh
-        self.box_thresh = box_thresh
-        self.max_candidates = max_candidates
-        self.unclip_ratio = unclip_ratio
-        self.min_size = 3
-        self.score_mode = score_mode
-        if use_dilation:
-            self.dilation_kernel = np.array([[1, 1], [1, 1]])
-        else:
-            self.dilation_kernel = None
-    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
-        '''
-        _bitmap: single map with shape (1, H, W),
-                whose values are binarized as {0, 1}
-        '''
-        bitmap = _bitmap
-        height, width = bitmap.shape
-        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
-                                cv2.CHAIN_APPROX_SIMPLE)
-        if len(outs) == 3:
-            img, contours, _ = outs[0], outs[1], outs[2]
-        elif len(outs) == 2:
-            contours, _ = outs[0], outs[1]
-        num_contours = min(len(contours), self.max_candidates)
-        boxes = []
-        scores = []
-        for index in range(num_contours):
-            contour = contours[index]
-            points, sside = self.get_mini_boxes(contour)
-            if sside < self.min_size:
-                continue
-            points = np.array(points)
-            if self.score_mode == "fast":
-                score = self.box_score_fast(pred, points.reshape(-1, 2))
-            else:
-                score = self.box_score_slow(pred, contour)
-            if self.box_thresh > score:
-                continue
-            box = self.unclip(points).reshape(-1, 1, 2)
-            box, sside = self.get_mini_boxes(box)
-            if sside < self.min_size + 2:
-                continue
-            box = np.array(box)
-            box[:, 0] = np.clip(
-                np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height)
-            boxes.append(box.astype(np.int16))
-            scores.append(score)
-        return np.array(boxes, dtype=np.int16), scores
-    def unclip(self, box):
-        unclip_ratio = self.unclip_ratio
-        poly = Polygon(box)
-        distance = poly.area * unclip_ratio / poly.length
-        offset = pyclipper.PyclipperOffset()
-        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-        expanded = np.array(offset.Execute(distance))
-        return expanded
-    def get_mini_boxes(self, contour):
-        bounding_box = cv2.minAreaRect(contour)
-        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
-        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
-        if points[1][1] > points[0][1]:
-            index_1 = 0
-            index_4 = 1
-        else:
-            index_1 = 1
-            index_4 = 0
-        if points[3][1] > points[2][1]:
-            index_2 = 2
-            index_3 = 3
-        else:
-            index_2 = 3
-            index_3 = 2
-        box = [
-            points[index_1], points[index_2], points[index_3], points[index_4]
-        ]
-        return box, min(bounding_box[1])
-    def box_score_fast(self, bitmap, _box):
-        h, w = bitmap.shape[:2]
-        box = _box.copy()
-        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
-        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
-        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
-        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
-        box[:, 0] = box[:, 0] - xmin
-        box[:, 1] = box[:, 1] - ymin
-        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
-        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-    def box_score_slow(self, bitmap, contour):
-        '''
-        box_score_slow: use polyon mean score as the mean score
-        '''
-        h, w = bitmap.shape[:2]
-        contour = contour.copy()
-        contour = np.reshape(contour, (-1, 2))
-        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
-        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
-        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
-        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
-        contour[:, 0] = contour[:, 0] - xmin
-        contour[:, 1] = contour[:, 1] - ymin
-        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
-        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-    def __call__(self, pred, shape_list):
-        pred = pred[:, 0, :, :]
-        segmentation = pred > self.thresh
-        boxes_batch = []
-        for batch_index in range(pred.shape[0]):
-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
-            if self.dilation_kernel is not None:
-                mask = cv2.dilate(
-                    np.array(segmentation[batch_index]).astype(np.uint8),
-                    self.dilation_kernel)
-            else:
-                mask = segmentation[batch_index]
-            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
-                                                   src_w, src_h)
-            boxes_batch.append({'points': boxes})
-        return boxes_batch

rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_recognize import TextRecognizer

rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml DELETED Viewed

@@ -1,12 +0,0 @@
-model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
-use_cuda: false
-# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-CUDAExecutionProvider:
-    device_id: 0
-    arena_extend_strategy: kNextPowerOfTwo
-    cudnn_conv_algo_search: EXHAUSTIVE
-    do_copy_in_default_stream: true
-rec_img_shape: [3, 48, 320]
-rec_batch_num: 6

rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py DELETED Viewed

@@ -1,120 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import math
-import time
-from typing import List
-import cv2
-import numpy as np
-try:
-    from .utils import CTCLabelDecode, read_yaml, OrtInferSession
-except:
-    from utils import CTCLabelDecode, read_yaml, OrtInferSession
-class TextRecognizer(object):
-    def __init__(self, config):
-        session_instance = OrtInferSession(config)
-        self.session = session_instance.session
-        self.input_name = session_instance.get_input_name()
-        meta_dict = session_instance.get_metadata()
-        if 'character' in meta_dict.keys():
-            self.character_dict_path = meta_dict['character'].splitlines()
-        else:
-            self.character_dict_path = config.get('keys_path', None)
-        self.postprocess_op = CTCLabelDecode(self.character_dict_path)
-        self.rec_batch_num = config['rec_batch_num']
-        self.rec_image_shape = config['rec_img_shape']
-    def __call__(self, img_list: List[np.ndarray]):
-        if isinstance(img_list, np.ndarray):
-            img_list = [img_list]
-        # Calculate the aspect ratio of all text bars
-        width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
-        # Sorting can speed up the recognition process
-        indices = np.argsort(np.array(width_list))
-        img_num = len(img_list)
-        rec_res = [['', 0.0]] * img_num
-        batch_num = self.rec_batch_num
-        elapse = 0
-        for beg_img_no in range(0, img_num, batch_num):
-            end_img_no = min(img_num, beg_img_no + batch_num)
-            max_wh_ratio = 0
-            for ino in range(beg_img_no, end_img_no):
-                h, w = img_list[indices[ino]].shape[0:2]
-                wh_ratio = w * 1.0 / h
-                max_wh_ratio = max(max_wh_ratio, wh_ratio)
-            norm_img_batch = []
-            for ino in range(beg_img_no, end_img_no):
-                norm_img = self.resize_norm_img(img_list[indices[ino]],
-                                                max_wh_ratio)
-                norm_img_batch.append(norm_img[np.newaxis, :])
-            norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
-            starttime = time.time()
-            onnx_inputs = {self.input_name: norm_img_batch}
-            preds = self.session.run(None, onnx_inputs)[0]
-            rec_result = self.postprocess_op(preds)
-            for rno in range(len(rec_result)):
-                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
-            elapse += time.time() - starttime
-        return rec_res, elapse
-    def resize_norm_img(self, img, max_wh_ratio):
-        img_channel, img_height, img_width = self.rec_image_shape
-        assert img_channel == img.shape[2]
-        img_width = int(img_height * max_wh_ratio)
-        h, w = img.shape[:2]
-        ratio = w / float(h)
-        if math.ceil(img_height * ratio) > img_width:
-            resized_w = img_width
-        else:
-            resized_w = int(math.ceil(img_height * ratio))
-        resized_image = cv2.resize(img, (resized_w, img_height))
-        resized_image = resized_image.astype('float32')
-        resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        padding_im = np.zeros((img_channel, img_height, img_width),
-                              dtype=np.float32)
-        padding_im[:, :, 0:resized_w] = resized_image
-        return padding_im
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--image_path', type=str, help='image_dir|image_path')
-    parser.add_argument('--config_path', type=str, default='config.yaml')
-    args = parser.parse_args()
-    config = read_yaml(args.config_path)
-    text_recognizer = TextRecognizer(config)
-    img = cv2.imread(args.image_path)
-    rec_res, predict_time = text_recognizer(img)
-    print(f'rec result: {rec_res}\t cost: {predict_time}s')

rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py DELETED Viewed

@@ -1,128 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import warnings
-import numpy as np
-import yaml
-from onnxruntime import (get_available_providers, get_device,
-                         SessionOptions, InferenceSession,
-                         GraphOptimizationLevel)
-class OrtInferSession(object):
-    def __init__(self, config):
-        sess_opt = SessionOptions()
-        sess_opt.log_severity_level = 4
-        sess_opt.enable_cpu_mem_arena = False
-        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
-        cuda_ep = 'CUDAExecutionProvider'
-        cpu_ep = 'CPUExecutionProvider'
-        cpu_provider_options = {
-            "arena_extend_strategy": "kSameAsRequested",
-        }
-        EP_list = []
-        if config['use_cuda'] and get_device() == 'GPU' \
-                and cuda_ep in get_available_providers():
-            EP_list = [(cuda_ep, config[cuda_ep])]
-        EP_list.append((cpu_ep, cpu_provider_options))
-        self.session = InferenceSession(config['model_path'],
-                                        sess_options=sess_opt,
-                                        providers=EP_list)
-        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
-            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
-                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
-                          'you can check their relations from the offical web site: '
-                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
-                          RuntimeWarning)
-    def get_input_name(self, input_idx=0):
-        return self.session.get_inputs()[input_idx].name
-    def get_output_name(self, output_idx=0):
-        return self.session.get_outputs()[output_idx].name
-    def get_metadata(self):
-        meta_dict = self.session.get_modelmeta().custom_metadata_map
-        return meta_dict
-def read_yaml(yaml_path):
-    with open(yaml_path, 'rb') as f:
-        data = yaml.load(f, Loader=yaml.Loader)
-    return data
-class CTCLabelDecode(object):
-    """ Convert between text-label and text-index """
-    def __init__(self, character_dict_path):
-        super(CTCLabelDecode, self).__init__()
-        self.character_str = []
-        assert character_dict_path is not None, "character_dict_path should not be None"
-        if isinstance(character_dict_path, str):
-            with open(character_dict_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    line = line.decode('utf-8').strip("\n").strip("\r\n")
-                    self.character_str.append(line)
-        else:
-            self.character_str = character_dict_path
-        self.character_str.append(' ')
-        dict_character = self.add_special_char(self.character_str)
-        self.character = dict_character
-        self.dict = {}
-        for i, char in enumerate(dict_character):
-            self.dict[char] = i
-    def __call__(self, preds, label=None):
-        preds_idx = preds.argmax(axis=2)
-        preds_prob = preds.max(axis=2)
-        text = self.decode(preds_idx, preds_prob,
-                           is_remove_duplicate=True)
-        if label is None:
-            return text
-        label = self.decode(label)
-        return text, label
-    def add_special_char(self, dict_character):
-        dict_character = ['blank'] + dict_character
-        return dict_character
-    def get_ignored_tokens(self):
-        return [0]  # for ctc blank
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            result_list.append((text, np.mean(conf_list  + [1e-50] )))
-        return result_list

rapidocr_onnxruntime/rapid_ocr_api.py DELETED Viewed

@@ -1,176 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import copy
-import importlib
-import sys
-from pathlib import Path
-import cv2
-import numpy as np
-import yaml
-root_dir = Path(__file__).resolve().parent
-sys.path.append(str(root_dir))
-class TextSystem(object):
-    def __init__(self, config_path):
-        super(TextSystem).__init__()
-        if not Path(config_path).exists():
-            raise FileExistsError(f'{config_path} does not exist!')
-        config = self.read_yaml(config_path)
-        global_config = config['Global']
-        self.print_verbose = global_config['print_verbose']
-        self.text_score = global_config['text_score']
-        self.min_height = global_config['min_height']
-        self.width_height_ratio = global_config['width_height_ratio']
-        TextDetector = self.init_module(config['Det']['module_name'],
-                                        config['Det']['class_name'])
-        self.text_detector = TextDetector(config['Det'])
-        TextRecognizer = self.init_module(config['Rec']['module_name'],
-                                          config['Rec']['class_name'])
-        self.text_recognizer = TextRecognizer(config['Rec'])
-        self.use_angle_cls = config['Global']['use_angle_cls']
-        if self.use_angle_cls:
-            TextClassifier = self.init_module(config['Cls']['module_name'],
-                                              config['Cls']['class_name'])
-            self.text_cls = TextClassifier(config['Cls'])
-    def __call__(self, img: np.ndarray, **kwargs):
-        # 这里更改几个超参数的值
-        if kwargs:
-            # 获得超参数
-            box_thresh = kwargs.get('box_thresh', 0.5)
-            unclip_ratio = kwargs.get('unclip_ratio', 1.6)
-            text_score = kwargs.get('text_score', 0.5)
-            # 更新超参数
-            self.text_detector.postprocess_op.box_thresh = box_thresh
-            self.text_detector.postprocess_op.unclip_ratio = unclip_ratio
-            self.text_score = text_score
-        h, w = img.shape[:2]
-        if self.width_height_ratio == -1:
-            use_limit_ratio = False
-        else:
-            use_limit_ratio = w / h > self.width_height_ratio
-        if h <= self.min_height or use_limit_ratio:
-            dt_boxes, img_crop_list = self.get_boxes_img_without_det(img, h, w)
-        else:
-            dt_boxes, elapse = self.text_detector(img)
-            if dt_boxes is None or len(dt_boxes) < 1:
-                return None, None
-            if self.print_verbose:
-                print(f'dt_boxes num: {len(dt_boxes)}, elapse: {elapse}')
-            dt_boxes = self.sorted_boxes(dt_boxes)
-            img_crop_list = self.get_crop_img_list(img, dt_boxes)
-        if self.use_angle_cls:
-            img_crop_list, _, elapse = self.text_cls(img_crop_list)
-            if self.print_verbose:
-                print(f'cls num: {len(img_crop_list)}, elapse: {elapse}')
-        rec_res, elapse = self.text_recognizer(img_crop_list)
-        if self.print_verbose:
-            print(f'rec_res num: {len(rec_res)}, elapse: {elapse}')
-        filter_boxes, filter_rec_res = self.filter_boxes_rec_by_score(dt_boxes,
-                                                                      rec_res)
-        return filter_boxes, filter_rec_res
-    @staticmethod
-    def read_yaml(yaml_path):
-        with open(yaml_path, 'rb') as f:
-            data = yaml.load(f, Loader=yaml.Loader)
-        return data
-    @staticmethod
-    def init_module(module_name, class_name):
-        module_part = importlib.import_module(module_name)
-        return getattr(module_part, class_name)
-    def get_boxes_img_without_det(self, img, h, w):
-        x0, y0, x1, y1 = 0, 0, w, h
-        dt_boxes = np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
-        dt_boxes = dt_boxes[np.newaxis, ...]
-        img_crop_list = [img]
-        return dt_boxes, img_crop_list
-    def get_crop_img_list(self, img, dt_boxes):
-        def get_rotate_crop_image(img, points):
-            img_crop_width = int(
-                max(
-                    np.linalg.norm(points[0] - points[1]),
-                    np.linalg.norm(points[2] - points[3])))
-            img_crop_height = int(
-                max(
-                    np.linalg.norm(points[0] - points[3]),
-                    np.linalg.norm(points[1] - points[2])))
-            pts_std = np.float32([[0, 0], [img_crop_width, 0],
-                                  [img_crop_width, img_crop_height],
-                                  [0, img_crop_height]])
-            M = cv2.getPerspectiveTransform(points, pts_std)
-            dst_img = cv2.warpPerspective(
-                img,
-                M, (img_crop_width, img_crop_height),
-                borderMode=cv2.BORDER_REPLICATE,
-                flags=cv2.INTER_CUBIC)
-            dst_img_height, dst_img_width = dst_img.shape[0:2]
-            if dst_img_height * 1.0 / dst_img_width >= 1.5:
-                dst_img = np.rot90(dst_img)
-            return dst_img
-        img_crop_list = []
-        for box in dt_boxes:
-            tmp_box = copy.deepcopy(box)
-            img_crop = get_rotate_crop_image(img, tmp_box)
-            img_crop_list.append(img_crop)
-        return img_crop_list
-    @staticmethod
-    def sorted_boxes(dt_boxes):
-        """
-        Sort text boxes in order from top to bottom, left to right
-        args:
-            dt_boxes(array):detected text boxes with shape [4, 2]
-        return:
-            sorted boxes(array) with shape [4, 2]
-        """
-        num_boxes = dt_boxes.shape[0]
-        sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
-        _boxes = list(sorted_boxes)
-        for i in range(num_boxes - 1):
-            if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
-                    (_boxes[i + 1][0][0] < _boxes[i][0][0]):
-                tmp = _boxes[i]
-                _boxes[i] = _boxes[i + 1]
-                _boxes[i + 1] = tmp
-        return _boxes
-    def filter_boxes_rec_by_score(self, dt_boxes, rec_res):
-        filter_boxes, filter_rec_res = [], []
-        for box, rec_reuslt in zip(dt_boxes, rec_res):
-            text, score = rec_reuslt
-            if score >= self.text_score:
-                filter_boxes.append(box)
-                filter_rec_res.append(rec_reuslt)
-        return filter_boxes, filter_rec_res
-if __name__ == '__main__':
-    text_sys = TextSystem('config.yaml')
-    import cv2
-    img = cv2.imread('resources/test_images/det_images/ch_en_num.jpg')
-    result = text_sys(img)
-    print(result)

requirements.txt CHANGED Viewed

@@ -1,10 +1,3 @@
 Gradio
-pyclipper>=1.2.0
-Shapely>=1.7.1
-opencv_python>=4.5.1.48
-six>=1.15.0
-numpy>=1.19.5
 Pillow
-PyYAML
-pytest
-onnxruntime

 Gradio
 Pillow
+rapidocr_onnxruntime

resources/fonts/.gitkeep DELETED Viewed

File without changes

resources/models/.gitkeep DELETED Viewed

File without changes

resources/models/ch_PP-OCRv3_det_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3439588c030faea393a54515f51e983d8e155b19a2e8aba7891934c1cf0de526
-size 2432880

resources/models/ch_PP-OCRv3_rec_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:897a3ededb38fee0dae2c1ccee38241f37df202c9509e3abca02e9217c5ee615
-size 10690752

resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e47acedf663230f8863ff1ab0e64dd2d82b838fceb5957146dab185a89d6215c
-size 585532