Luke
commited on
Commit
·
633cf99
1
Parent(s):
44eb855
no message
Browse files- Preprocess/preprocessImg.py +19 -1
- app.py +72 -44
Preprocess/preprocessImg.py
CHANGED
|
@@ -3,6 +3,24 @@ import numpy as np
|
|
| 3 |
from PIL import Image, ImageEnhance
|
| 4 |
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# 方案一
|
| 7 |
def preprocess_image001(image):
|
| 8 |
# 將影像轉換為 NumPy 數組
|
|
@@ -18,7 +36,7 @@ def preprocess_image001(image):
|
|
| 18 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
| 19 |
return Image.fromarray(denoised)
|
| 20 |
|
| 21 |
-
|
| 22 |
def preprocess_image002(image):
|
| 23 |
# 將 PIL Image 轉換為 numpy array
|
| 24 |
image_np = np.array(image)
|
|
|
|
| 3 |
from PIL import Image, ImageEnhance
|
| 4 |
|
| 5 |
|
| 6 |
+
def PreprocessImg(image):
|
| 7 |
+
if image is None:
|
| 8 |
+
raise ValueError("尚未上傳圖片!")
|
| 9 |
+
|
| 10 |
+
# 方案一
|
| 11 |
+
pre_img_001 = preprocess_image001(image)
|
| 12 |
+
# 方案二
|
| 13 |
+
pre_img_002 = preprocess_image002(image)
|
| 14 |
+
# 方案三
|
| 15 |
+
pre_img_003 = preprocess_image003(image)
|
| 16 |
+
# 方案四
|
| 17 |
+
pre_img_004 = preprocess_image004(image)
|
| 18 |
+
# 方案五
|
| 19 |
+
pre_img_005 = preprocess_image005(image)
|
| 20 |
+
|
| 21 |
+
return pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005
|
| 22 |
+
|
| 23 |
+
|
| 24 |
# 方案一
|
| 25 |
def preprocess_image001(image):
|
| 26 |
# 將影像轉換為 NumPy 數組
|
|
|
|
| 36 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
| 37 |
return Image.fromarray(denoised)
|
| 38 |
|
| 39 |
+
# 方案二
|
| 40 |
def preprocess_image002(image):
|
| 41 |
# 將 PIL Image 轉換為 numpy array
|
| 42 |
image_np = np.array(image)
|
app.py
CHANGED
|
@@ -2,57 +2,70 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
from Plan.AiLLM import llm_recognition
|
| 4 |
from Plan.pytesseractOCR import ocr_recognition
|
| 5 |
-
from Preprocess.preprocessImg import
|
| 6 |
-
preprocess_image001, preprocess_image002, preprocess_image003,
|
| 7 |
-
preprocess_image004, preprocess_image005
|
| 8 |
-
)
|
| 9 |
|
| 10 |
# 取得所有語言清單
|
| 11 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
| 12 |
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# 方案一
|
| 16 |
-
|
| 17 |
-
ocr_result_001 = ocr_recognition(pre_img_001, valid_type, language)
|
| 18 |
# 方案二
|
| 19 |
-
|
| 20 |
-
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
| 21 |
# 方案三
|
| 22 |
-
|
| 23 |
-
ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
|
| 24 |
# 方案四
|
| 25 |
-
|
| 26 |
-
ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
|
| 27 |
# 方案五
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
| 32 |
-
ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
def preprocess_and_llm(image, valid_type, language):
|
| 36 |
# 方案一
|
| 37 |
-
|
| 38 |
-
llm_result_001 = llm_recognition(pre_img_001, valid_type, language)
|
| 39 |
# 方案二
|
| 40 |
-
|
| 41 |
-
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
| 42 |
# 方案三
|
| 43 |
-
|
| 44 |
-
llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
|
| 45 |
# 方案四
|
| 46 |
-
|
| 47 |
-
llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
|
| 48 |
# 方案五
|
| 49 |
-
|
| 50 |
-
llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
|
| 51 |
|
| 52 |
-
return
|
| 53 |
-
llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
|
| 54 |
|
| 55 |
|
|
|
|
| 56 |
with gr.Blocks() as demo:
|
| 57 |
with gr.Row():
|
| 58 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
|
@@ -60,8 +73,17 @@ with gr.Blocks() as demo:
|
|
| 60 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
| 61 |
|
| 62 |
with gr.Row():
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
with gr.Row():
|
| 67 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
|
@@ -85,17 +107,23 @@ with gr.Blocks() as demo:
|
|
| 85 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
| 86 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
demo.launch(share=False)
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from Plan.AiLLM import llm_recognition
|
| 4 |
from Plan.pytesseractOCR import ocr_recognition
|
| 5 |
+
from Preprocess.preprocessImg import PreprocessImg
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# 取得所有語言清單
|
| 8 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
| 9 |
|
| 10 |
|
| 11 |
+
# 預處理圖片
|
| 12 |
+
def preprocess_image(image):
|
| 13 |
+
if image is None:
|
| 14 |
+
gr.Warning("尚未上傳圖片!")
|
| 15 |
+
raise ValueError("尚未上傳圖片!")
|
| 16 |
+
preprocessed_images = PreprocessImg(image)
|
| 17 |
+
return (
|
| 18 |
+
preprocessed_images,
|
| 19 |
+
True,
|
| 20 |
+
preprocessed_images[0],
|
| 21 |
+
preprocessed_images[1],
|
| 22 |
+
preprocessed_images[2],
|
| 23 |
+
preprocessed_images[3],
|
| 24 |
+
preprocessed_images[4]
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# pytesseract OCR
|
| 29 |
+
def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
| 30 |
+
if not finish_pre_img:
|
| 31 |
+
gr.Warning("請先完成圖像預處理!")
|
| 32 |
+
raise ValueError("請先完成圖像預處理!")
|
| 33 |
+
|
| 34 |
# 方案一
|
| 35 |
+
ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
|
|
|
|
| 36 |
# 方案二
|
| 37 |
+
ocr_result_002 = ocr_recognition(preprocessed_images[1], valid_type, language)
|
|
|
|
| 38 |
# 方案三
|
| 39 |
+
ocr_result_003 = ocr_recognition(preprocessed_images[2], valid_type, language)
|
|
|
|
| 40 |
# 方案四
|
| 41 |
+
ocr_result_004 = ocr_recognition(preprocessed_images[3], valid_type, language)
|
|
|
|
| 42 |
# 方案五
|
| 43 |
+
ocr_result_005 = ocr_recognition(preprocessed_images[4], valid_type, language)
|
| 44 |
+
|
| 45 |
+
return ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005
|
| 46 |
|
|
|
|
|
|
|
| 47 |
|
| 48 |
+
# AI LLM OCR
|
| 49 |
+
def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
| 50 |
+
if not finish_pre_img:
|
| 51 |
+
gr.Warning("請先完成圖像預處理!")
|
| 52 |
+
raise ValueError("請先完成圖像預處理!")
|
| 53 |
|
|
|
|
| 54 |
# 方案一
|
| 55 |
+
llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
|
|
|
|
| 56 |
# 方案二
|
| 57 |
+
llm_result_002 = llm_recognition(preprocessed_images[1], valid_type, language)
|
|
|
|
| 58 |
# 方案三
|
| 59 |
+
llm_result_003 = llm_recognition(preprocessed_images[2], valid_type, language)
|
|
|
|
| 60 |
# 方案四
|
| 61 |
+
llm_result_004 = llm_recognition(preprocessed_images[3], valid_type, language)
|
|
|
|
| 62 |
# 方案五
|
| 63 |
+
llm_result_005 = llm_recognition(preprocessed_images[4], valid_type, language)
|
|
|
|
| 64 |
|
| 65 |
+
return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
+
# VIEW
|
| 69 |
with gr.Blocks() as demo:
|
| 70 |
with gr.Row():
|
| 71 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
|
|
|
| 73 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
| 74 |
|
| 75 |
with gr.Row():
|
| 76 |
+
preImg_button = gr.Button("圖片預先處理")
|
| 77 |
+
|
| 78 |
+
with gr.Row():
|
| 79 |
+
with gr.Column():
|
| 80 |
+
ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
|
| 81 |
+
gr.Markdown(
|
| 82 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package: Google Pytesseract</div>")
|
| 83 |
+
with gr.Column():
|
| 84 |
+
llm_button = gr.Button("使用 AI LLM 模型辨識")
|
| 85 |
+
gr.Markdown(
|
| 86 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
|
| 87 |
|
| 88 |
with gr.Row():
|
| 89 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
|
|
|
| 107 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
| 108 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
| 109 |
|
| 110 |
+
# 定義狀態
|
| 111 |
+
finish_pre_img_state = gr.State(False)
|
| 112 |
+
preprocessed_images_state = gr.State([])
|
| 113 |
+
|
| 114 |
+
# 預先處理圖片 按鈕
|
| 115 |
+
preImg_button.click(preprocess_image, inputs=[image_input],
|
| 116 |
+
outputs=[preprocessed_images_state, finish_pre_img_state,
|
| 117 |
+
preprocess_output_001, preprocess_output_002,
|
| 118 |
+
preprocess_output_003, preprocess_output_004,
|
| 119 |
+
preprocess_output_005])
|
| 120 |
+
# pytesseract 按鈕
|
| 121 |
+
ocr_button.click(Basic_ocr, inputs=[validation_type, language_dropdown,
|
| 122 |
+
preprocessed_images_state, finish_pre_img_state],
|
| 123 |
+
outputs=[ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005])
|
| 124 |
+
# AI LLM 按鈕
|
| 125 |
+
llm_button.click(AiLLM_ocr, inputs=[validation_type, language_dropdown,
|
| 126 |
+
preprocessed_images_state, finish_pre_img_state],
|
| 127 |
+
outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
|
| 128 |
|
| 129 |
demo.launch(share=False)
|