zhichyu commited on
Commit
9abdac5
·
1 Parent(s): f79c4c9

Added jdk to happify tika (#3165)

Browse files

### What problem does this PR solve?

Added jdk to happify tika(https://pypi.org/project/tika/). The image
size become ~400MB bigger. Close #2886

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (2) hide show
  1. Dockerfile +9 -6
  2. Dockerfile.slim +12 -8
Dockerfile CHANGED
@@ -15,13 +15,15 @@ RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked
15
 
16
  # If you download Python modules too slow, you can use a pip mirror site to speed up apt and poetry
17
  RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
18
- ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
19
 
20
  RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
21
- apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-pip python3-poetry \
22
- && pip3 install --user --break-system-packages poetry-plugin-pypi-mirror --index-url https://pypi.tuna.tsinghua.edu.cn/simple/ \
23
  && rm -rf /var/lib/apt/lists/*
24
 
 
 
 
 
25
  # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
26
  # aspose-slides on linux/arm64 is unavailable
27
  RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
@@ -30,12 +32,13 @@ RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/li
30
  fi
31
 
32
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
33
-
34
  # Configure Poetry
35
  ENV POETRY_NO_INTERACTION=1
36
  ENV POETRY_VIRTUALENVS_IN_PROJECT=true
37
  ENV POETRY_VIRTUALENVS_CREATE=true
38
  ENV POETRY_REQUESTS_TIMEOUT=15
 
39
 
40
  # builder stage
41
  FROM base AS builder
@@ -57,9 +60,9 @@ COPY pyproject.toml poetry.toml poetry.lock ./
57
 
58
  RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
59
  if [ "$LIGHTEN" -eq 0 ]; then \
60
- poetry install --sync --no-root --with=full; \
61
  else \
62
- poetry install --sync --no-root; \
63
  fi
64
 
65
  # production stage
 
15
 
16
  # If you download Python modules too slow, you can use a pip mirror site to speed up apt and poetry
17
  RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
 
18
 
19
  RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
20
+ apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
 
21
  && rm -rf /var/lib/apt/lists/*
22
 
23
+ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
24
+ && pipx install poetry \
25
+ && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
26
+
27
  # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
28
  # aspose-slides on linux/arm64 is unavailable
29
  RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
 
32
  fi
33
 
34
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
35
+ ENV PATH=/root/.local/bin:$PATH
36
  # Configure Poetry
37
  ENV POETRY_NO_INTERACTION=1
38
  ENV POETRY_VIRTUALENVS_IN_PROJECT=true
39
  ENV POETRY_VIRTUALENVS_CREATE=true
40
  ENV POETRY_REQUESTS_TIMEOUT=15
41
+ ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
42
 
43
  # builder stage
44
  FROM base AS builder
 
60
 
61
  RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
62
  if [ "$LIGHTEN" -eq 0 ]; then \
63
+ poetry install --no-root --with=full; \
64
  else \
65
+ poetry install --no-root; \
66
  fi
67
 
68
  # production stage
Dockerfile.slim CHANGED
@@ -15,26 +15,30 @@ RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked
15
 
16
  # If you download Python modules too slow, you can use a pip mirror site to speed up apt and poetry
17
  RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
18
- ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
19
 
20
  RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
21
- apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-pip python3-poetry \
22
- && pip3 install --user --break-system-packages poetry-plugin-pypi-mirror --index-url https://pypi.tuna.tsinghua.edu.cn/simple/ \
23
  && rm -rf /var/lib/apt/lists/*
24
 
 
 
 
 
25
  # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
26
  # aspose-slides on linux/arm64 is unavailable
27
- RUN if [ "${ARCH}" = "amd64" ]; then \
28
- curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb; \
 
29
  fi
30
 
31
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
32
-
33
  # Configure Poetry
34
  ENV POETRY_NO_INTERACTION=1
35
  ENV POETRY_VIRTUALENVS_IN_PROJECT=true
36
  ENV POETRY_VIRTUALENVS_CREATE=true
37
  ENV POETRY_REQUESTS_TIMEOUT=15
 
38
 
39
  # builder stage
40
  FROM base AS builder
@@ -56,9 +60,9 @@ COPY pyproject.toml poetry.toml poetry.lock ./
56
 
57
  RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
58
  if [ "$LIGHTEN" -eq 0 ]; then \
59
- poetry install --sync --no-root --with=full; \
60
  else \
61
- poetry install --sync --no-root; \
62
  fi
63
 
64
  # production stage
 
15
 
16
  # If you download Python modules too slow, you can use a pip mirror site to speed up apt and poetry
17
  RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
 
18
 
19
  RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
20
+ apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
 
21
  && rm -rf /var/lib/apt/lists/*
22
 
23
+ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
24
+ && pipx install poetry \
25
+ && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
26
+
27
  # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
28
  # aspose-slides on linux/arm64 is unavailable
29
+ RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
30
+ if [ "${ARCH}" = "amd64" ]; then \
31
+ dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
32
  fi
33
 
34
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
35
+ ENV PATH=/root/.local/bin:$PATH
36
  # Configure Poetry
37
  ENV POETRY_NO_INTERACTION=1
38
  ENV POETRY_VIRTUALENVS_IN_PROJECT=true
39
  ENV POETRY_VIRTUALENVS_CREATE=true
40
  ENV POETRY_REQUESTS_TIMEOUT=15
41
+ ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
42
 
43
  # builder stage
44
  FROM base AS builder
 
60
 
61
  RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
62
  if [ "$LIGHTEN" -eq 0 ]; then \
63
+ poetry install --no-root --with=full; \
64
  else \
65
+ poetry install --no-root; \
66
  fi
67
 
68
  # production stage