switch to Docker space
Browse files- Dockerfile +13 -0
- README.md +2 -5
- app.py +1 -19
- packages.txt +0 -2
- requirements.txt +0 -9
Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10
|
| 2 |
+
|
| 3 |
+
ARG PIP_NO_CACHE_DIR=1
|
| 4 |
+
|
| 5 |
+
RUN apt install tesseract-ocr
|
| 6 |
+
RUN wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
|
| 7 |
+
&& dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
|
| 8 |
+
|
| 9 |
+
RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
|
| 10 |
+
RUN pip install paddlepaddle==2.5.1 -i https://mirror.baidu.com/pypi/simple
|
| 11 |
+
RUN pip install transformers pytesseract paddleocr gradio Pillow
|
| 12 |
+
|
| 13 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,12 +1,9 @@
|
|
| 1 |
---
|
| 2 |
title: Layoutlm Docvqa Paddleocr
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: yellow
|
| 6 |
-
sdk:
|
| 7 |
-
sdk_version: 4.8.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: Layoutlm Docvqa Paddleocr
|
| 3 |
+
emoji: 📄
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: yellow
|
| 6 |
+
sdk: docker
|
|
|
|
|
|
|
|
|
|
| 7 |
---
|
| 8 |
|
| 9 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -1,19 +1,3 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
|
| 3 |
-
if sys.platform == "linux":
|
| 4 |
-
try:
|
| 5 |
-
import paddle
|
| 6 |
-
|
| 7 |
-
except ImportError:
|
| 8 |
-
import os
|
| 9 |
-
|
| 10 |
-
# install libssl1.1 on HF spaces
|
| 11 |
-
os.system(
|
| 12 |
-
"wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb"
|
| 13 |
-
)
|
| 14 |
-
os.system("dpkg -x libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb .")
|
| 15 |
-
os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + ":./usr/lib/x86_64-linux-gnu"
|
| 16 |
-
|
| 17 |
import cv2
|
| 18 |
import gradio as gr
|
| 19 |
import numpy as np
|
|
@@ -45,9 +29,7 @@ def predict(image: Image.Image, question: str, ocr_engine: str):
|
|
| 45 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
| 46 |
|
| 47 |
for box in boxes:
|
| 48 |
-
cv2.polylines(
|
| 49 |
-
image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3
|
| 50 |
-
)
|
| 51 |
|
| 52 |
x1 = boxes[:, :, 0].min(1) * 1000 / image.width
|
| 53 |
y1 = boxes[:, :, 1].min(1) * 1000 / image.height
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import cv2
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
|
|
|
| 29 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
| 30 |
|
| 31 |
for box in boxes:
|
| 32 |
+
cv2.polylines(image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3)
|
|
|
|
|
|
|
| 33 |
|
| 34 |
x1 = boxes[:, :, 0].min(1) * 1000 / image.width
|
| 35 |
y1 = boxes[:, :, 1].min(1) * 1000 / image.height
|
packages.txt
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
sudo
|
| 2 |
-
tesseract-ocr
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
--index-url https://download.pytorch.org/whl/cpu
|
| 2 |
-
--extra-index-url https://mirror.baidu.com/pypi/simple
|
| 3 |
-
numpy
|
| 4 |
-
torch
|
| 5 |
-
transformers
|
| 6 |
-
paddlepaddle==2.5.1
|
| 7 |
-
paddleocr
|
| 8 |
-
opencv-python-headless
|
| 9 |
-
pytesseract
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|