Spaces:
Running
Running
Commit
·
483610e
1
Parent(s):
5610ef5
feat: enhance Dockerfile and database setup for SQLite support
Browse files- Dockerfile +14 -4
- src/crawlgpt/core/database.py +2 -1
Dockerfile
CHANGED
@@ -1,32 +1,37 @@
|
|
1 |
# This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end
|
2 |
# Specifically for huggingface spaces
|
3 |
|
4 |
-
#
|
5 |
FROM python:3.12-slim
|
6 |
|
7 |
# Set working directory
|
8 |
WORKDIR /app
|
9 |
|
10 |
-
# Install system dependencies including Chrome/Playwright dependencies
|
11 |
RUN apt-get update && apt-get install -y \
|
12 |
build-essential \
|
13 |
curl \
|
14 |
software-properties-common \
|
15 |
sudo \
|
16 |
git \
|
|
|
|
|
17 |
&& rm -rf /var/lib/apt/lists/*
|
18 |
|
19 |
# Create a non-root user and set permissions
|
20 |
RUN useradd -m -s /bin/bash appuser && \
|
21 |
echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
|
|
|
|
|
|
|
22 |
RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai
|
23 |
RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports
|
24 |
|
25 |
-
# Copy project files
|
26 |
COPY pyproject.toml setup_env.py ./
|
27 |
COPY src/ ./src/
|
28 |
COPY tests/ ./tests/
|
29 |
-
|
30 |
# Gotta tweak some things in our main core code (LLMBasedCrowler.py) Comment out the following line:
|
31 |
# from dotenv import load_dotenv # line 11 It is not needed in the docker container
|
32 |
# Because it's trying to load the API credentials from .env file which we don't have in the container
|
@@ -62,6 +67,11 @@ ENV PATH="/app/src:${PATH}"
|
|
62 |
# Switch to non-root user
|
63 |
USER appuser
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
# Allow appuser to install Python packages locally (user-level installations)
|
66 |
ENV PATH="/home/appuser/.local/bin:${PATH}"
|
67 |
RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser
|
|
|
1 |
# This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end
|
2 |
# Specifically for huggingface spaces
|
3 |
|
4 |
+
# Modified Dockerfile with database support
|
5 |
FROM python:3.12-slim
|
6 |
|
7 |
# Set working directory
|
8 |
WORKDIR /app
|
9 |
|
10 |
+
# Install system dependencies including SQLite and Chrome/Playwright dependencies
|
11 |
RUN apt-get update && apt-get install -y \
|
12 |
build-essential \
|
13 |
curl \
|
14 |
software-properties-common \
|
15 |
sudo \
|
16 |
git \
|
17 |
+
libsqlite3-dev \
|
18 |
+
sqlite3 \
|
19 |
&& rm -rf /var/lib/apt/lists/*
|
20 |
|
21 |
# Create a non-root user and set permissions
|
22 |
RUN useradd -m -s /bin/bash appuser && \
|
23 |
echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
|
24 |
+
|
25 |
+
# Set ownership for database storage
|
26 |
+
RUN mkdir -p /app/data && chown -R appuser:appuser /app/data
|
27 |
RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai
|
28 |
RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports
|
29 |
|
30 |
+
# Copy project files and set ownership
|
31 |
COPY pyproject.toml setup_env.py ./
|
32 |
COPY src/ ./src/
|
33 |
COPY tests/ ./tests/
|
34 |
+
RUN chown -R appuser:appuser /app # Ensure appuser owns all files
|
35 |
# Gotta tweak some things in our main core code (LLMBasedCrowler.py) Comment out the following line:
|
36 |
# from dotenv import load_dotenv # line 11 It is not needed in the docker container
|
37 |
# Because it's trying to load the API credentials from .env file which we don't have in the container
|
|
|
67 |
# Switch to non-root user
|
68 |
USER appuser
|
69 |
|
70 |
+
# Initialize database directory
|
71 |
+
RUN mkdir -p /app/data && \
|
72 |
+
touch ${DATABASE_PATH} && \
|
73 |
+
chmod 644 ${DATABASE_PATH}
|
74 |
+
|
75 |
# Allow appuser to install Python packages locally (user-level installations)
|
76 |
ENV PATH="/home/appuser/.local/bin:${PATH}"
|
77 |
RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser
|
src/crawlgpt/core/database.py
CHANGED
@@ -4,6 +4,7 @@ from sqlalchemy.ext.declarative import declarative_base
|
|
4 |
from sqlalchemy.orm import sessionmaker, relationship
|
5 |
from datetime import datetime
|
6 |
from passlib.context import CryptContext
|
|
|
7 |
|
8 |
Base = declarative_base()
|
9 |
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
@@ -27,7 +28,7 @@ class ChatHistory(Base):
|
|
27 |
timestamp = Column(DateTime, default=datetime.utcnow)
|
28 |
user = relationship("User", back_populates="chats")
|
29 |
|
30 |
-
engine = create_engine('sqlite:///crawlgpt.db')
|
31 |
Base.metadata.create_all(bind=engine)
|
32 |
Session = sessionmaker(bind=engine)
|
33 |
|
|
|
4 |
from sqlalchemy.orm import sessionmaker, relationship
|
5 |
from datetime import datetime
|
6 |
from passlib.context import CryptContext
|
7 |
+
import os
|
8 |
|
9 |
Base = declarative_base()
|
10 |
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
|
28 |
timestamp = Column(DateTime, default=datetime.utcnow)
|
29 |
user = relationship("User", back_populates="chats")
|
30 |
|
31 |
+
engine = create_engine(os.getenv('DATABASE_URL', 'sqlite:///crawlgpt.db'))
|
32 |
Base.metadata.create_all(bind=engine)
|
33 |
Session = sessionmaker(bind=engine)
|
34 |
|