Really-amin commited on
Commit
8e77663
·
verified ·
1 Parent(s): bbcbdc2

Upload 8 files

Browse files
backup/.env ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ENVIRONMENT=production
2
+ MAX_WORKERS=4
3
+ WORKER_TIMEOUT=300
4
+ MODEL_CACHE_SIZE=1024
5
+ REDIS_URL=redis://redis:6379
6
+ MONGODB_URL=mongodb://ocr_user:ocr_password@mongodb:27017/ocr_db
7
+ ELASTICSEARCH_URL=http://elasticsearch:9200
8
+ LOG_LEVEL=INFO
9
+ ENABLE_METRICS=true
10
+
11
+ # Default admin credentials
12
+ ADMIN_USERNAME=admin
13
+ ADMIN_PASSWORD=change_me_in_production
14
+
15
+ # Service versions
16
+ ELASTICSEARCH_VERSION=8.8.0
17
+ KIBANA_VERSION=8.8.0
18
+ MONGODB_VERSION=latest
19
+ REDIS_VERSION=alpine
20
+
21
+ # Resource limits
22
+ API_CPU_LIMIT=2
23
+ API_MEMORY_LIMIT=4G
24
+ DB_CPU_LIMIT=1
25
+ DB_MEMORY_LIMIT=2G
backup/docker-compose.yml ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ ocr-api:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ image: persian-ocr-api
9
+ container_name: persian-ocr-service
10
+ restart: unless-stopped
11
+ ports:
12
+ - "8000:8000"
13
+ volumes:
14
+ - ./app:/app
15
+ - ./logs:/app/logs
16
+ - ./models:/app/models
17
+ - ./temp_uploads:/app/temp_uploads
18
+ environment:
19
+ - ENVIRONMENT=production
20
+ - MAX_WORKERS=4
21
+ - WORKER_TIMEOUT=300
22
+ - MODEL_CACHE_SIZE=1024
23
+ - REDIS_URL=redis://redis:6379
24
+ - MONGODB_URL=mongodb://mongodb:27017/ocr_db
25
+ - LOG_LEVEL=INFO
26
+ - ENABLE_METRICS=true
27
+ depends_on:
28
+ - redis
29
+ - mongodb
30
+ - elasticsearch
31
+ healthcheck:
32
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
33
+ interval: 30s
34
+ timeout: 10s
35
+ retries: 3
36
+ deploy:
37
+ resources:
38
+ limits:
39
+ cpus: '2'
40
+ memory: 4G
41
+ reservations:
42
+ cpus: '1'
43
+ memory: 2G
44
+ networks:
45
+ - ocr-network
46
+
47
+ redis:
48
+ image: redis:alpine
49
+ container_name: ocr-redis
50
+ command: redis-server --appendonly yes
51
+ restart: unless-stopped
52
+ ports:
53
+ - "6379:6379"
54
+ volumes:
55
+ - redis-data:/data
56
+ healthcheck:
57
+ test: ["CMD", "redis-cli", "ping"]
58
+ interval: 30s
59
+ timeout: 10s
60
+ retries: 3
61
+ deploy:
62
+ resources:
63
+ limits:
64
+ cpus: '0.5'
65
+ memory: 1G
66
+ networks:
67
+ - ocr-network
68
+
69
+ mongodb:
70
+ image: mongo:latest
71
+ container_name: ocr-mongodb
72
+ restart: unless-stopped
73
+ environment:
74
+ - MONGO_INITDB_ROOT_USERNAME=admin
75
+ - MONGO_INITDB_ROOT_PASSWORD=secret
76
+ - MONGO_INITDB_DATABASE=ocr_db
77
+ ports:
78
+ - "27017:27017"
79
+ volumes:
80
+ - mongodb-data:/data/db
81
+ - ./mongo-init:/docker-entrypoint-initdb.d
82
+ healthcheck:
83
+ test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
84
+ interval: 30s
85
+ timeout: 10s
86
+ retries: 3
87
+ deploy:
88
+ resources:
89
+ limits:
90
+ cpus: '1'
91
+ memory: 2G
92
+ networks:
93
+ - ocr-network
94
+
95
+ elasticsearch:
96
+ image: elasticsearch:8.8.0
97
+ container_name: ocr-elasticsearch
98
+ restart: unless-stopped
99
+ environment:
100
+ - discovery.type=single-node
101
+ - ES_JAVA_OPTS=-Xms512m -Xmx512m
102
+ - xpack.security.enabled=false
103
+ ports:
104
+ - "9200:9200"
105
+ volumes:
106
+ - elasticsearch-data:/usr/share/elasticsearch/data
107
+ healthcheck:
108
+ test: ["CMD-SHELL", "curl -s http://localhost:9200/_cluster/health | grep -q 'status.*green\\|status.*yellow'"]
109
+ interval: 30s
110
+ timeout: 10s
111
+ retries: 3
112
+ deploy:
113
+ resources:
114
+ limits:
115
+ cpus: '1'
116
+ memory: 2G
117
+ networks:
118
+ - ocr-network
119
+
120
+ kibana:
121
+ image: kibana:8.8.0
122
+ container_name: ocr-kibana
123
+ restart: unless-stopped
124
+ ports:
125
+ - "5601:5601"
126
+ environment:
127
+ - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
128
+ depends_on:
129
+ - elasticsearch
130
+ deploy:
131
+ resources:
132
+ limits:
133
+ cpus: '0.5'
134
+ memory: 1G
135
+ networks:
136
+ - ocr-network
137
+
138
+ prometheus:
139
+ image: prom/prometheus:latest
140
+ container_name: ocr-prometheus
141
+ restart: unless-stopped
142
+ ports:
143
+ - "9090:9090"
144
+ volumes:
145
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml
146
+ - prometheus-data:/prometheus
147
+ command:
148
+ - '--config.file=/etc/prometheus/prometheus.yml'
149
+ - '--storage.tsdb.path=/prometheus'
150
+ - '--web.console.libraries=/usr/share/prometheus/console_libraries'
151
+ - '--web.console.templates=/usr/share/prometheus/consoles'
152
+ deploy:
153
+ resources:
154
+ limits:
155
+ cpus: '0.5'
156
+ memory: 1G
157
+ networks:
158
+ - ocr-network
159
+
160
+ grafana:
161
+ image: grafana/grafana:latest
162
+ container_name: ocr-grafana
163
+ restart: unless-stopped
164
+ ports:
165
+ - "3000:3000"
166
+ volumes:
167
+ - grafana-data:/var/lib/grafana
168
+ environment:
169
+ - GF_SECURITY_ADMIN_PASSWORD=admin
170
+ - GF_USERS_ALLOW_SIGN_UP=false
171
+ depends_on:
172
+ - prometheus
173
+ deploy:
174
+ resources:
175
+ limits:
176
+ cpus: '0.5'
177
+ memory: 1G
178
+ networks:
179
+ - ocr-network
180
+
181
+ traefik:
182
+ image: traefik:v2.10
183
+ container_name: ocr-traefik
184
+ restart: unless-stopped
185
+ ports:
186
+ - "80:80"
187
+ - "443:443"
188
+ - "8080:8080"
189
+ volumes:
190
+ - ./traefik.yml:/etc/traefik/traefik.yml
191
+ - ./acme.json:/acme.json
192
+ - /var/run/docker.sock:/var/run/docker.sock:ro
193
+ deploy:
194
+ resources:
195
+ limits:
196
+ cpus: '0.5'
197
+ memory: 512M
198
+ networks:
199
+ - ocr-network
200
+
201
+ volumes:
202
+ redis-data:
203
+ mongodb-data:
204
+ elasticsearch-data:
205
+ prometheus-data:
206
+ grafana-data:
207
+
208
+ networks:
209
+ ocr-network:
210
+ driver: bridge
backup/dockerfile ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Prevent interactive prompts during installation
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ # Set environment variables
7
+ ENV PYTHONUNBUFFERED=1 \
8
+ PYTHONDONTWRITEBYTECODE=1 \
9
+ TZ=Asia/Tehran
10
+
11
+ # Set the timezone
12
+ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
13
+
14
+ # Install system dependencies
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ build-essential \
17
+ tesseract-ocr \
18
+ tesseract-ocr-fas \
19
+ libtesseract-dev \
20
+ libgl1-mesa-glx \
21
+ libglib2.0-0 \
22
+ libsm6 \
23
+ libxext6 \
24
+ libxrender-dev \
25
+ libpango-1.0-0 \
26
+ libpangocairo-1.0-0 \
27
+ git \
28
+ wget \
29
+ curl \
30
+ && rm -rf /var/lib/apt/lists/*
31
+
32
+ # Create app directory
33
+ WORKDIR /app
34
+
35
+ # Copy requirements first to leverage Docker cache
36
+ COPY requirements.txt .
37
+
38
+ # Install Python dependencies
39
+ RUN pip3 install --no-cache-dir -r requirements.txt
40
+
41
+ # Copy the rest of the application
42
+ COPY . .
43
+
44
+ # Create necessary directories
45
+ RUN mkdir -p /app/temp_uploads /app/logs /app/models
46
+
47
+ # Set user permissions
48
+ RUN chown -R 1000:1000 /app
49
+ USER 1000
50
+
51
+ # Expose port
52
+ EXPOSE 8000
53
+
54
+ # Start command with gunicorn
55
+ CMD ["gunicorn", "webhook_api:create_api", "--workers", "4", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"]
backup/mongo-init.js ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ db.createUser(
2
+ {
3
+ user: "ocr_user",
4
+ pwd: "ocr_password",
5
+ roles: [
6
+ {
7
+ role: "readWrite",
8
+ db: "ocr_db"
9
+ }
10
+ ]
11
+ }
12
+ );
13
+
14
+ db.createCollection("processed_documents");
15
+ db.createCollection("webhook_logs");
16
+ db.createCollection("metrics");
17
+
18
+ // Create indexes
19
+ db.processed_documents.createIndex({ "created_at": 1 });
20
+ db.processed_documents.createIndex({ "status": 1 });
21
+ db.webhook_logs.createIndex({ "timestamp": 1 });
22
+ db.metrics.createIndex({ "timestamp": 1 });
backup/prometheus-config.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ scrape_interval: 15s
3
+ evaluation_interval: 15s
4
+
5
+ scrape_configs:
6
+ - job_name: 'ocr-api'
7
+ static_configs:
8
+ - targets: ['ocr-api:8000']
9
+ metrics_path: '/metrics'
10
+
11
+ - job_name: 'prometheus'
12
+ static_configs:
13
+ - targets: ['localhost:9090']
backup/traefik-config.js ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ api:
2
+ dashboard: true
3
+ insecure: true
4
+
5
+ providers:
6
+ docker:
7
+ endpoint: "unix:///var/run/docker.sock"
8
+ exposedByDefault: false
9
+
10
+ entryPoints:
11
+ web:
12
+ address: ":80"
13
+ http:
14
+ redirections:
15
+ entryPoint:
16
+ to: websecure
17
+ scheme: https
18
+
19
+ websecure:
20
+ address: ":443"
21
+
22
+ certificatesResolvers:
23
+ letsencrypt:
24
+ acme:
25
26
+ storage: acme.json
27
+ httpChallenge:
28
+ entryPoint: web
data/ocr_data.db ADDED
File without changes
data/word_dictionary.xlsx ADDED
Binary file (6.19 kB). View file