Upload 2 files
Browse files- Dockerfile +51 -0
- README.md +18 -10
Dockerfile
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM openjdk:17-jdk-slim AS builder
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
RUN apt-get update && apt-get install -y \
|
6 |
+
git \
|
7 |
+
curl \
|
8 |
+
gradle \
|
9 |
+
maven
|
10 |
+
|
11 |
+
RUN git clone https://github.com/Stirling-Tools/Stirling-PDF.git .
|
12 |
+
|
13 |
+
RUN ./gradlew build
|
14 |
+
|
15 |
+
FROM openjdk:17-jdk-slim
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY --from=builder /app/build/libs/*.jar /app/stirling-pdf.jar
|
20 |
+
|
21 |
+
RUN mkdir -p /app/logs /app/configs /app/customFiles /usr/share/tessdata && \
|
22 |
+
chmod -R 777 /app/logs /app/configs /app/customFiles /usr/share/tessdata
|
23 |
+
|
24 |
+
RUN apt-get update && apt-get install -y \
|
25 |
+
libreoffice \
|
26 |
+
poppler-utils \
|
27 |
+
tesseract-ocr \
|
28 |
+
tesseract-ocr-eng \
|
29 |
+
wget \
|
30 |
+
python3 \
|
31 |
+
python3-pip && \
|
32 |
+
pip3 install --no-cache-dir unoconv WeasyPrint pdf2image pillow && \
|
33 |
+
cd /usr/share/tessdata && \
|
34 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata && \
|
35 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_tra.traineddata && \
|
36 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata && \
|
37 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/jpn.traineddata && \
|
38 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/kor.traineddata && \
|
39 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/rus.traineddata && \
|
40 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
|
41 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/deu.traineddata && \
|
42 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/spa.traineddata && \
|
43 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/ita.traineddata && \
|
44 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/por.traineddata && \
|
45 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/vie.traineddata && \
|
46 |
+
wget https://github.com/tesseract-ocr/tessdata/raw/main/tha.traineddata && \
|
47 |
+
rm -rf /var/lib/apt/lists/*
|
48 |
+
|
49 |
+
EXPOSE 7860
|
50 |
+
|
51 |
+
CMD ["java", "-Dserver.port=7860", "-Dserver.address=0.0.0.0", "-Dfile.encoding=UTF-8", "-jar", "/app/stirling-pdf.jar"]
|
README.md
CHANGED
@@ -1,10 +1,18 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Stirling-PDF on Hugging Face
|
2 |
+
|
3 |
+
This is a deployment of Stirling-PDF, a powerful locally hosted web-based PDF manipulation tool.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- PDF Manipulation (Merge, Split, Rotate, etc.)
|
8 |
+
- OCR Support (Multiple Languages)
|
9 |
+
- Image to PDF Conversion
|
10 |
+
- And many more...
|
11 |
+
|
12 |
+
## Usage
|
13 |
+
|
14 |
+
Simply wait for the space to load (it may take a few minutes on first launch) and use the web interface to process your PDFs.
|
15 |
+
|
16 |
+
## Credits
|
17 |
+
|
18 |
+
Original project: [Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF)
|