jatinmehra commited on
Commit
483610e
·
1 Parent(s): 5610ef5

feat: enhance Dockerfile and database setup for SQLite support

Browse files
Files changed (2) hide show
  1. Dockerfile +14 -4
  2. src/crawlgpt/core/database.py +2 -1
Dockerfile CHANGED
@@ -1,32 +1,37 @@
1
  # This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end
2
  # Specifically for huggingface spaces
3
 
4
- # Use Python 3.12 as base image
5
  FROM python:3.12-slim
6
 
7
  # Set working directory
8
  WORKDIR /app
9
 
10
- # Install system dependencies including Chrome/Playwright dependencies
11
  RUN apt-get update && apt-get install -y \
12
  build-essential \
13
  curl \
14
  software-properties-common \
15
  sudo \
16
  git \
 
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Create a non-root user and set permissions
20
  RUN useradd -m -s /bin/bash appuser && \
21
  echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 
 
 
22
  RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai
23
  RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports
24
 
25
- # Copy project files
26
  COPY pyproject.toml setup_env.py ./
27
  COPY src/ ./src/
28
  COPY tests/ ./tests/
29
-
30
  # Gotta tweak some things in our main core code (LLMBasedCrowler.py) Comment out the following line:
31
  # from dotenv import load_dotenv # line 11 It is not needed in the docker container
32
  # Because it's trying to load the API credentials from .env file which we don't have in the container
@@ -62,6 +67,11 @@ ENV PATH="/app/src:${PATH}"
62
  # Switch to non-root user
63
  USER appuser
64
 
 
 
 
 
 
65
  # Allow appuser to install Python packages locally (user-level installations)
66
  ENV PATH="/home/appuser/.local/bin:${PATH}"
67
  RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser
 
1
  # This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end
2
  # Specifically for huggingface spaces
3
 
4
+ # Modified Dockerfile with database support
5
  FROM python:3.12-slim
6
 
7
  # Set working directory
8
  WORKDIR /app
9
 
10
+ # Install system dependencies including SQLite and Chrome/Playwright dependencies
11
  RUN apt-get update && apt-get install -y \
12
  build-essential \
13
  curl \
14
  software-properties-common \
15
  sudo \
16
  git \
17
+ libsqlite3-dev \
18
+ sqlite3 \
19
  && rm -rf /var/lib/apt/lists/*
20
 
21
  # Create a non-root user and set permissions
22
  RUN useradd -m -s /bin/bash appuser && \
23
  echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
24
+
25
+ # Set ownership for database storage
26
+ RUN mkdir -p /app/data && chown -R appuser:appuser /app/data
27
  RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai
28
  RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports
29
 
30
+ # Copy project files and set ownership
31
  COPY pyproject.toml setup_env.py ./
32
  COPY src/ ./src/
33
  COPY tests/ ./tests/
34
+ RUN chown -R appuser:appuser /app # Ensure appuser owns all files
35
  # Gotta tweak some things in our main core code (LLMBasedCrowler.py) Comment out the following line:
36
  # from dotenv import load_dotenv # line 11 It is not needed in the docker container
37
  # Because it's trying to load the API credentials from .env file which we don't have in the container
 
67
  # Switch to non-root user
68
  USER appuser
69
 
70
+ # Initialize database directory
71
+ RUN mkdir -p /app/data && \
72
+ touch ${DATABASE_PATH} && \
73
+ chmod 644 ${DATABASE_PATH}
74
+
75
  # Allow appuser to install Python packages locally (user-level installations)
76
  ENV PATH="/home/appuser/.local/bin:${PATH}"
77
  RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser
src/crawlgpt/core/database.py CHANGED
@@ -4,6 +4,7 @@ from sqlalchemy.ext.declarative import declarative_base
4
  from sqlalchemy.orm import sessionmaker, relationship
5
  from datetime import datetime
6
  from passlib.context import CryptContext
 
7
 
8
  Base = declarative_base()
9
  pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
@@ -27,7 +28,7 @@ class ChatHistory(Base):
27
  timestamp = Column(DateTime, default=datetime.utcnow)
28
  user = relationship("User", back_populates="chats")
29
 
30
- engine = create_engine('sqlite:///crawlgpt.db')
31
  Base.metadata.create_all(bind=engine)
32
  Session = sessionmaker(bind=engine)
33
 
 
4
  from sqlalchemy.orm import sessionmaker, relationship
5
  from datetime import datetime
6
  from passlib.context import CryptContext
7
+ import os
8
 
9
  Base = declarative_base()
10
  pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 
28
  timestamp = Column(DateTime, default=datetime.utcnow)
29
  user = relationship("User", back_populates="chats")
30
 
31
+ engine = create_engine(os.getenv('DATABASE_URL', 'sqlite:///crawlgpt.db'))
32
  Base.metadata.create_all(bind=engine)
33
  Session = sessionmaker(bind=engine)
34