Spaces:
Running
Running
Upload 8 files
Browse files- backup/.env +25 -0
- backup/docker-compose.yml +210 -0
- backup/dockerfile +55 -0
- backup/mongo-init.js +22 -0
- backup/prometheus-config.yml +13 -0
- backup/traefik-config.js +28 -0
- data/ocr_data.db +0 -0
- data/word_dictionary.xlsx +0 -0
backup/.env
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ENVIRONMENT=production
|
2 |
+
MAX_WORKERS=4
|
3 |
+
WORKER_TIMEOUT=300
|
4 |
+
MODEL_CACHE_SIZE=1024
|
5 |
+
REDIS_URL=redis://redis:6379
|
6 |
+
MONGODB_URL=mongodb://ocr_user:ocr_password@mongodb:27017/ocr_db
|
7 |
+
ELASTICSEARCH_URL=http://elasticsearch:9200
|
8 |
+
LOG_LEVEL=INFO
|
9 |
+
ENABLE_METRICS=true
|
10 |
+
|
11 |
+
# Default admin credentials
|
12 |
+
ADMIN_USERNAME=admin
|
13 |
+
ADMIN_PASSWORD=change_me_in_production
|
14 |
+
|
15 |
+
# Service versions
|
16 |
+
ELASTICSEARCH_VERSION=8.8.0
|
17 |
+
KIBANA_VERSION=8.8.0
|
18 |
+
MONGODB_VERSION=latest
|
19 |
+
REDIS_VERSION=alpine
|
20 |
+
|
21 |
+
# Resource limits
|
22 |
+
API_CPU_LIMIT=2
|
23 |
+
API_MEMORY_LIMIT=4G
|
24 |
+
DB_CPU_LIMIT=1
|
25 |
+
DB_MEMORY_LIMIT=2G
|
backup/docker-compose.yml
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
ocr-api:
|
5 |
+
build:
|
6 |
+
context: .
|
7 |
+
dockerfile: Dockerfile
|
8 |
+
image: persian-ocr-api
|
9 |
+
container_name: persian-ocr-service
|
10 |
+
restart: unless-stopped
|
11 |
+
ports:
|
12 |
+
- "8000:8000"
|
13 |
+
volumes:
|
14 |
+
- ./app:/app
|
15 |
+
- ./logs:/app/logs
|
16 |
+
- ./models:/app/models
|
17 |
+
- ./temp_uploads:/app/temp_uploads
|
18 |
+
environment:
|
19 |
+
- ENVIRONMENT=production
|
20 |
+
- MAX_WORKERS=4
|
21 |
+
- WORKER_TIMEOUT=300
|
22 |
+
- MODEL_CACHE_SIZE=1024
|
23 |
+
- REDIS_URL=redis://redis:6379
|
24 |
+
- MONGODB_URL=mongodb://mongodb:27017/ocr_db
|
25 |
+
- LOG_LEVEL=INFO
|
26 |
+
- ENABLE_METRICS=true
|
27 |
+
depends_on:
|
28 |
+
- redis
|
29 |
+
- mongodb
|
30 |
+
- elasticsearch
|
31 |
+
healthcheck:
|
32 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
33 |
+
interval: 30s
|
34 |
+
timeout: 10s
|
35 |
+
retries: 3
|
36 |
+
deploy:
|
37 |
+
resources:
|
38 |
+
limits:
|
39 |
+
cpus: '2'
|
40 |
+
memory: 4G
|
41 |
+
reservations:
|
42 |
+
cpus: '1'
|
43 |
+
memory: 2G
|
44 |
+
networks:
|
45 |
+
- ocr-network
|
46 |
+
|
47 |
+
redis:
|
48 |
+
image: redis:alpine
|
49 |
+
container_name: ocr-redis
|
50 |
+
command: redis-server --appendonly yes
|
51 |
+
restart: unless-stopped
|
52 |
+
ports:
|
53 |
+
- "6379:6379"
|
54 |
+
volumes:
|
55 |
+
- redis-data:/data
|
56 |
+
healthcheck:
|
57 |
+
test: ["CMD", "redis-cli", "ping"]
|
58 |
+
interval: 30s
|
59 |
+
timeout: 10s
|
60 |
+
retries: 3
|
61 |
+
deploy:
|
62 |
+
resources:
|
63 |
+
limits:
|
64 |
+
cpus: '0.5'
|
65 |
+
memory: 1G
|
66 |
+
networks:
|
67 |
+
- ocr-network
|
68 |
+
|
69 |
+
mongodb:
|
70 |
+
image: mongo:latest
|
71 |
+
container_name: ocr-mongodb
|
72 |
+
restart: unless-stopped
|
73 |
+
environment:
|
74 |
+
- MONGO_INITDB_ROOT_USERNAME=admin
|
75 |
+
- MONGO_INITDB_ROOT_PASSWORD=secret
|
76 |
+
- MONGO_INITDB_DATABASE=ocr_db
|
77 |
+
ports:
|
78 |
+
- "27017:27017"
|
79 |
+
volumes:
|
80 |
+
- mongodb-data:/data/db
|
81 |
+
- ./mongo-init:/docker-entrypoint-initdb.d
|
82 |
+
healthcheck:
|
83 |
+
test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
|
84 |
+
interval: 30s
|
85 |
+
timeout: 10s
|
86 |
+
retries: 3
|
87 |
+
deploy:
|
88 |
+
resources:
|
89 |
+
limits:
|
90 |
+
cpus: '1'
|
91 |
+
memory: 2G
|
92 |
+
networks:
|
93 |
+
- ocr-network
|
94 |
+
|
95 |
+
elasticsearch:
|
96 |
+
image: elasticsearch:8.8.0
|
97 |
+
container_name: ocr-elasticsearch
|
98 |
+
restart: unless-stopped
|
99 |
+
environment:
|
100 |
+
- discovery.type=single-node
|
101 |
+
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
102 |
+
- xpack.security.enabled=false
|
103 |
+
ports:
|
104 |
+
- "9200:9200"
|
105 |
+
volumes:
|
106 |
+
- elasticsearch-data:/usr/share/elasticsearch/data
|
107 |
+
healthcheck:
|
108 |
+
test: ["CMD-SHELL", "curl -s http://localhost:9200/_cluster/health | grep -q 'status.*green\\|status.*yellow'"]
|
109 |
+
interval: 30s
|
110 |
+
timeout: 10s
|
111 |
+
retries: 3
|
112 |
+
deploy:
|
113 |
+
resources:
|
114 |
+
limits:
|
115 |
+
cpus: '1'
|
116 |
+
memory: 2G
|
117 |
+
networks:
|
118 |
+
- ocr-network
|
119 |
+
|
120 |
+
kibana:
|
121 |
+
image: kibana:8.8.0
|
122 |
+
container_name: ocr-kibana
|
123 |
+
restart: unless-stopped
|
124 |
+
ports:
|
125 |
+
- "5601:5601"
|
126 |
+
environment:
|
127 |
+
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
|
128 |
+
depends_on:
|
129 |
+
- elasticsearch
|
130 |
+
deploy:
|
131 |
+
resources:
|
132 |
+
limits:
|
133 |
+
cpus: '0.5'
|
134 |
+
memory: 1G
|
135 |
+
networks:
|
136 |
+
- ocr-network
|
137 |
+
|
138 |
+
prometheus:
|
139 |
+
image: prom/prometheus:latest
|
140 |
+
container_name: ocr-prometheus
|
141 |
+
restart: unless-stopped
|
142 |
+
ports:
|
143 |
+
- "9090:9090"
|
144 |
+
volumes:
|
145 |
+
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
146 |
+
- prometheus-data:/prometheus
|
147 |
+
command:
|
148 |
+
- '--config.file=/etc/prometheus/prometheus.yml'
|
149 |
+
- '--storage.tsdb.path=/prometheus'
|
150 |
+
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
151 |
+
- '--web.console.templates=/usr/share/prometheus/consoles'
|
152 |
+
deploy:
|
153 |
+
resources:
|
154 |
+
limits:
|
155 |
+
cpus: '0.5'
|
156 |
+
memory: 1G
|
157 |
+
networks:
|
158 |
+
- ocr-network
|
159 |
+
|
160 |
+
grafana:
|
161 |
+
image: grafana/grafana:latest
|
162 |
+
container_name: ocr-grafana
|
163 |
+
restart: unless-stopped
|
164 |
+
ports:
|
165 |
+
- "3000:3000"
|
166 |
+
volumes:
|
167 |
+
- grafana-data:/var/lib/grafana
|
168 |
+
environment:
|
169 |
+
- GF_SECURITY_ADMIN_PASSWORD=admin
|
170 |
+
- GF_USERS_ALLOW_SIGN_UP=false
|
171 |
+
depends_on:
|
172 |
+
- prometheus
|
173 |
+
deploy:
|
174 |
+
resources:
|
175 |
+
limits:
|
176 |
+
cpus: '0.5'
|
177 |
+
memory: 1G
|
178 |
+
networks:
|
179 |
+
- ocr-network
|
180 |
+
|
181 |
+
traefik:
|
182 |
+
image: traefik:v2.10
|
183 |
+
container_name: ocr-traefik
|
184 |
+
restart: unless-stopped
|
185 |
+
ports:
|
186 |
+
- "80:80"
|
187 |
+
- "443:443"
|
188 |
+
- "8080:8080"
|
189 |
+
volumes:
|
190 |
+
- ./traefik.yml:/etc/traefik/traefik.yml
|
191 |
+
- ./acme.json:/acme.json
|
192 |
+
- /var/run/docker.sock:/var/run/docker.sock:ro
|
193 |
+
deploy:
|
194 |
+
resources:
|
195 |
+
limits:
|
196 |
+
cpus: '0.5'
|
197 |
+
memory: 512M
|
198 |
+
networks:
|
199 |
+
- ocr-network
|
200 |
+
|
201 |
+
volumes:
|
202 |
+
redis-data:
|
203 |
+
mongodb-data:
|
204 |
+
elasticsearch-data:
|
205 |
+
prometheus-data:
|
206 |
+
grafana-data:
|
207 |
+
|
208 |
+
networks:
|
209 |
+
ocr-network:
|
210 |
+
driver: bridge
|
backup/dockerfile
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
# Prevent interactive prompts during installation
|
4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
5 |
+
|
6 |
+
# Set environment variables
|
7 |
+
ENV PYTHONUNBUFFERED=1 \
|
8 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
9 |
+
TZ=Asia/Tehran
|
10 |
+
|
11 |
+
# Set the timezone
|
12 |
+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
13 |
+
|
14 |
+
# Install system dependencies
|
15 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
16 |
+
build-essential \
|
17 |
+
tesseract-ocr \
|
18 |
+
tesseract-ocr-fas \
|
19 |
+
libtesseract-dev \
|
20 |
+
libgl1-mesa-glx \
|
21 |
+
libglib2.0-0 \
|
22 |
+
libsm6 \
|
23 |
+
libxext6 \
|
24 |
+
libxrender-dev \
|
25 |
+
libpango-1.0-0 \
|
26 |
+
libpangocairo-1.0-0 \
|
27 |
+
git \
|
28 |
+
wget \
|
29 |
+
curl \
|
30 |
+
&& rm -rf /var/lib/apt/lists/*
|
31 |
+
|
32 |
+
# Create app directory
|
33 |
+
WORKDIR /app
|
34 |
+
|
35 |
+
# Copy requirements first to leverage Docker cache
|
36 |
+
COPY requirements.txt .
|
37 |
+
|
38 |
+
# Install Python dependencies
|
39 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
40 |
+
|
41 |
+
# Copy the rest of the application
|
42 |
+
COPY . .
|
43 |
+
|
44 |
+
# Create necessary directories
|
45 |
+
RUN mkdir -p /app/temp_uploads /app/logs /app/models
|
46 |
+
|
47 |
+
# Set user permissions
|
48 |
+
RUN chown -R 1000:1000 /app
|
49 |
+
USER 1000
|
50 |
+
|
51 |
+
# Expose port
|
52 |
+
EXPOSE 8000
|
53 |
+
|
54 |
+
# Start command with gunicorn
|
55 |
+
CMD ["gunicorn", "webhook_api:create_api", "--workers", "4", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"]
|
backup/mongo-init.js
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
db.createUser(
|
2 |
+
{
|
3 |
+
user: "ocr_user",
|
4 |
+
pwd: "ocr_password",
|
5 |
+
roles: [
|
6 |
+
{
|
7 |
+
role: "readWrite",
|
8 |
+
db: "ocr_db"
|
9 |
+
}
|
10 |
+
]
|
11 |
+
}
|
12 |
+
);
|
13 |
+
|
14 |
+
db.createCollection("processed_documents");
|
15 |
+
db.createCollection("webhook_logs");
|
16 |
+
db.createCollection("metrics");
|
17 |
+
|
18 |
+
// Create indexes
|
19 |
+
db.processed_documents.createIndex({ "created_at": 1 });
|
20 |
+
db.processed_documents.createIndex({ "status": 1 });
|
21 |
+
db.webhook_logs.createIndex({ "timestamp": 1 });
|
22 |
+
db.metrics.createIndex({ "timestamp": 1 });
|
backup/prometheus-config.yml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
global:
|
2 |
+
scrape_interval: 15s
|
3 |
+
evaluation_interval: 15s
|
4 |
+
|
5 |
+
scrape_configs:
|
6 |
+
- job_name: 'ocr-api'
|
7 |
+
static_configs:
|
8 |
+
- targets: ['ocr-api:8000']
|
9 |
+
metrics_path: '/metrics'
|
10 |
+
|
11 |
+
- job_name: 'prometheus'
|
12 |
+
static_configs:
|
13 |
+
- targets: ['localhost:9090']
|
backup/traefik-config.js
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
api:
|
2 |
+
dashboard: true
|
3 |
+
insecure: true
|
4 |
+
|
5 |
+
providers:
|
6 |
+
docker:
|
7 |
+
endpoint: "unix:///var/run/docker.sock"
|
8 |
+
exposedByDefault: false
|
9 |
+
|
10 |
+
entryPoints:
|
11 |
+
web:
|
12 |
+
address: ":80"
|
13 |
+
http:
|
14 |
+
redirections:
|
15 |
+
entryPoint:
|
16 |
+
to: websecure
|
17 |
+
scheme: https
|
18 |
+
|
19 |
+
websecure:
|
20 |
+
address: ":443"
|
21 |
+
|
22 |
+
certificatesResolvers:
|
23 |
+
letsencrypt:
|
24 |
+
acme:
|
25 |
+
email: [email protected]
|
26 |
+
storage: acme.json
|
27 |
+
httpChallenge:
|
28 |
+
entryPoint: web
|
data/ocr_data.db
ADDED
File without changes
|
data/word_dictionary.xlsx
ADDED
Binary file (6.19 kB). View file
|
|