File size: 3,032 Bytes
cd8b432
 
 
483610e
cd8b432
 
 
 
 
483610e
cd8b432
 
 
 
 
 
483610e
 
cd8b432
 
 
 
 
483610e
 
 
cd8b432
 
 
483610e
cd8b432
 
 
483610e
cd8b432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483610e
 
 
 
 
cd8b432
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end
# Specifically for huggingface spaces

# Modified Dockerfile with database support
FROM python:3.12-slim

# Set working directory
WORKDIR /app

# Install system dependencies including SQLite and Chrome/Playwright dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    software-properties-common \
    sudo \
    git \
    libsqlite3-dev \  
    sqlite3 \         
    && rm -rf /var/lib/apt/lists/*

# Create a non-root user and set permissions
RUN useradd -m -s /bin/bash appuser && \
    echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

# Set ownership for database storage
RUN mkdir -p /app/data && chown -R appuser:appuser /app/data
RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai
RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports

# Copy project files and set ownership
COPY pyproject.toml setup_env.py ./ 
COPY src/ ./src/
COPY tests/ ./tests/
RUN chown -R appuser:appuser /app  # Ensure appuser owns all files
# Gotta tweak some things in our main core code (LLMBasedCrowler.py) Comment out the following line:
# from dotenv import load_dotenv # line 11 It is not needed in the docker container 
# Because it's trying to load the API credentials from .env file which we don't have in the container


# Accept the secret token as a build argument
ARG GROQ_API_KEY
ARG OLLAMA_API_TOKEN

# Docs: https://huggingface.co/docs/hub/en/spaces-sdks-docker#secrets-and-variables-management

# Expose the secret GROQ_API_KEY and OLLAMA_API_TOKEN at build time and set them as environment variables
RUN --mount=type=secret,id=GROQ_API_KEY,mode=0444,required=true \
    export GROQ_API_KEY=$(cat /run/secrets/GROQ_API_KEY) && \
    echo "GROQ_API_KEY is set."
    
RUN --mount=type=secret,id=OLLAMA_API_TOKEN,mode=0444,required=true \
    export OLLAMA_API_TOKEN=$(cat /run/secrets/OLLAMA_API_TOKEN) && \
    echo "OLLAMA_API_TOKEN is set."

# Set environment variables using the build arguments
ENV OLLAMA_API_TOKEN=${OLLAMA_API_TOKEN}
ENV GROQ_API_KEY=${GROQ_API_KEY}

# Install Python dependencies
RUN pip install --no-cache-dir -e .
RUN pip install --no-cache-dir pytest pytest-mockito black isort flake8

# Set environment variables
ENV PYTHONPATH=/app
ENV PATH="/app/src:${PATH}"

# Switch to non-root user
USER appuser

# Initialize database directory
RUN mkdir -p /app/data && \
    touch ${DATABASE_PATH} && \
    chmod 644 ${DATABASE_PATH}

# Allow appuser to install Python packages locally (user-level installations)
ENV PATH="/home/appuser/.local/bin:${PATH}"
RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser

# Install Playwright and dependencies
RUN playwright install 
RUN playwright install-deps

# Expose Streamlit port
EXPOSE 7860

# Set default command to run the Streamlit app
CMD ["python", "-m", "streamlit", "run", "src/crawlgpt/ui/chat_app.py", "--server.port=7860", "--server.address=0.0.0.0"]