Shahid commited on
Commit
82e5025
·
0 Parent(s):

Initial clean history with LFS + unified app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +17 -0
  2. .gitattributes +9 -0
  3. .gitignore +208 -0
  4. Dockerfile +49 -0
  5. README.md +159 -0
  6. backend/app/__init__.py +2 -0
  7. backend/app/api/__init__.py +2 -0
  8. backend/app/api/routers/__init__.py +2 -0
  9. backend/app/api/routers/ocr.py +134 -0
  10. backend/app/api/routers/synthetic.py +122 -0
  11. backend/app/data/annotations/annotations.csv +1 -0
  12. backend/app/main.py +53 -0
  13. backend/app/services/annotations.py +75 -0
  14. backend/app/services/ocr_processor.py +102 -0
  15. backend/app/services/synthetic/__init__.py +22 -0
  16. backend/app/services/synthetic/backgrounds.py +129 -0
  17. backend/app/services/synthetic/config.py +61 -0
  18. backend/app/services/synthetic/core.py +230 -0
  19. backend/app/services/synthetic/effects.py +218 -0
  20. backend/app/services/synthetic/huggingface_processor.py +228 -0
  21. backend/app/services/synthetic/text_renderer.py +112 -0
  22. backend/app/services/synthetic/transformations.py +249 -0
  23. backend/requirements.txt +17 -0
  24. content/static/NotoSansOriya-Black (2).ttf +3 -0
  25. content/static/NotoSansOriya-Black.ttf +3 -0
  26. content/static/NotoSansOriya-Bold (2).ttf +3 -0
  27. content/static/NotoSansOriya-Bold.ttf +3 -0
  28. content/static/NotoSansOriya-ExtraBold (2).ttf +3 -0
  29. content/static/NotoSansOriya-ExtraBold.ttf +3 -0
  30. content/static/NotoSansOriya-ExtraLight (2).ttf +3 -0
  31. content/static/NotoSansOriya-ExtraLight.ttf +3 -0
  32. content/static/NotoSansOriya-Light (2).ttf +3 -0
  33. content/static/NotoSansOriya-Light.ttf +3 -0
  34. content/static/NotoSansOriya-Medium (2).ttf +3 -0
  35. content/static/NotoSansOriya-Medium.ttf +3 -0
  36. content/static/NotoSansOriya-Regular.ttf +3 -0
  37. content/static/NotoSansOriya-SemiBold.ttf +3 -0
  38. content/static/NotoSansOriya-Thin.ttf +3 -0
  39. content/static/NotoSansOriya_Condensed-Black.ttf +3 -0
  40. content/static/NotoSansOriya_Condensed-Bold.ttf +3 -0
  41. content/static/NotoSansOriya_Condensed-ExtraBold.ttf +3 -0
  42. content/static/NotoSansOriya_Condensed-ExtraLight.ttf +3 -0
  43. content/static/NotoSansOriya_Condensed-Light.ttf +3 -0
  44. content/static/NotoSansOriya_Condensed-Medium.ttf +3 -0
  45. content/static/NotoSansOriya_Condensed-Regular.ttf +3 -0
  46. content/static/NotoSansOriya_Condensed-SemiBold.ttf +3 -0
  47. content/static/NotoSansOriya_Condensed-Thin.ttf +3 -0
  48. content/static/NotoSansOriya_ExtraCondensed-Black.ttf +3 -0
  49. content/static/NotoSansOriya_ExtraCondensed-Bold.ttf +3 -0
  50. content/static/NotoSansOriya_ExtraCondensed-ExtraBold.ttf +3 -0
.dockerignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ **/__pycache__
3
+ **/*.pyc
4
+ node_modules
5
+ frontend/node_modules
6
+ frontend/.vite
7
+ frontend/dist
8
+ .DS_Store
9
+ .vscode
10
+ .idea
11
+ **/.pytest_cache
12
+ **/.mypy_cache
13
+ **/.ruff_cache
14
+ **/.cache
15
+
16
+ # Data should come from volume in Spaces
17
+ backend/data
.gitattributes ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ *.ttf filter=lfs diff=lfs merge=lfs -text
2
+ *.png filter=lfs diff=lfs merge=lfs -text
3
+ *.jpg filter=lfs diff=lfs merge=lfs -text
4
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
5
+ *.svg filter=lfs diff=lfs merge=lfs -text
6
+ .ttf filter=lfs diff=lfs merge=lfs -text
7
+ .png filter=lfs diff=lfs merge=lfs -text
8
+ .jpg filter=lfs diff=lfs merge=lfs -text
9
+ .jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+ backend/app/data/*
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-stage build for Hugging Face Spaces (Docker) with single URL serving
2
+
3
+ # --- Frontend build stage ---
4
+ FROM node:20-alpine AS frontend-build
5
+ WORKDIR /app
6
+ COPY frontend/package*.json ./
7
+ RUN npm ci
8
+ COPY frontend ./
9
+ RUN npm run build
10
+
11
+ # --- Backend stage ---
12
+ FROM python:3.11-slim AS runtime
13
+
14
+ # System deps for OpenCV and general libs
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ libgl1 \
17
+ libglib2.0-0 \
18
+ libsm6 \
19
+ libxext6 \
20
+ libxrender1 \
21
+ build-essential \
22
+ python3-dev \
23
+ git \
24
+ curl \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ WORKDIR /app
28
+
29
+ # Install backend deps
30
+ COPY backend/requirements.txt /app/backend/requirements.txt
31
+ RUN pip install --upgrade pip && \
32
+ pip install --no-cache-dir -r /app/backend/requirements.txt
33
+
34
+ # Copy application code
35
+ COPY backend /app/backend
36
+ COPY content /app/content
37
+
38
+ # Copy built frontend
39
+ COPY --from=frontend-build /app/dist /app/frontend_dist
40
+
41
+ # Environment
42
+ ENV PORT=7860 \
43
+ DATA_DIR=/data \
44
+ FRONTEND_DIST=/app/frontend_dist
45
+
46
+ EXPOSE 7860
47
+
48
+ # Run FastAPI (serve API + static frontend)
49
+ CMD ["python", "-m", "uvicorn", "app.main:app", "--app-dir", "backend", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Odia OCR Annotation + Synthetic Text Generator
2
+
3
+ A unified repository that provides:
4
+ - An OCR annotation tool (React frontend + FastAPI backend) to upload images, run OCR via Gemini, edit validated text, and export CSVs.
5
+ - A synthetic text generator (exposed via backend API) to render Odia/Sanskrit-like text with realistic paper/effects, including HuggingFace dataset processing.
6
+
7
+ ## Repository Structure
8
+
9
+ - `backend/`
10
+ - `app/main.py`: FastAPI app with two routers: `/api/ocr` and `/api/synthetic`
11
+ - `app/api/routers/ocr.py`: OCR endpoints (upload, OCR, annotations import/export)
12
+ - `app/api/routers/synthetic.py`: Synthetic generation endpoints
13
+ - `app/services/`: Shared services
14
+ - `ocr_processor.py`: Gemini OCR
15
+ - `annotations.py`: CSV/JSON I/O
16
+ - `synthetic/`: generator modules (config, core, effects, backgrounds, text_renderer, transformations, huggingface_processor)
17
+ - `data/`: runtime storage
18
+ - `uploaded_images/`: uploaded images (served at `/images`)
19
+ - `annotations/`: `annotations.csv` and JSON
20
+ - `synth_outputs/`: generated images and CSVs (served at `/static/synthetic`)
21
+ - `requirements.txt`: backend dependencies
22
+ - `frontend/`
23
+ - Vite + React + Tailwind app
24
+ - Routes: `/ocr` (annotation UI) and `/synthetic` (generator UI)
25
+ - `content/static/`: NotoSans Oriya fonts used by generator
26
+
27
+ ## Run Locally
28
+
29
+ 1) Backend
30
+ - `pip install -r backend/requirements.txt`
31
+ - From `backend/`: `uvicorn app.main:app --reload`
32
+ - Static mounts:
33
+ - `/images` → `backend/data/uploaded_images`
34
+ - `/static/synthetic` → `backend/data/synth_outputs`
35
+
36
+ 2) Frontend
37
+ - `cd frontend && npm install && npm run dev`
38
+ - Open `http://localhost:5173`
39
+ - Use navigation to switch between OCR and Synthetic pages
40
+
41
+ ## OCR API (FastAPI)
42
+
43
+ - `POST /api/ocr/upload`:
44
+ - Multipart files field: `files`
45
+ - Stores images in `backend/data/uploaded_images`
46
+ - `POST /api/ocr/process`:
47
+ - JSON: `{ "api_key": "<GEMINI_KEY>", "image_filenames": ["img1.png", ...] }`
48
+ - Returns: `{ "img1.png": "extracted text", ... }`
49
+ - `GET /api/ocr/annotations`:
50
+ - Returns current annotations, valid/missing images
51
+ - `POST /api/ocr/save`:
52
+ - JSON: `{ "<filename>": { "extracted_text": "...", "validated_text": "..." } }`
53
+ - Saves to CSV and JSON in `backend/data/annotations`
54
+ - `POST /api/ocr/import`:
55
+ - Multipart: `file` (CSV), `image_folder` (e.g., `uploaded_images`)
56
+ - Validates and returns annotations + image presence
57
+ - `POST /api/ocr/export`:
58
+ - JSON: `{ annotations: {...}, validated_texts: {...} }`
59
+ - Returns a downloadable CSV
60
+
61
+ Note: Legacy endpoints (`/upload/`, `/process-ocr/`, etc.) are temporarily supported for the older UI. Prefer `/api/ocr/...` going forward.
62
+
63
+ ## Synthetic API (FastAPI)
64
+
65
+ - `POST /api/synthetic/generate`
66
+ - Modes: `single`, `comprehensive`, `ultra-realistic`, `huggingface`
67
+ - Request body examples:
68
+ - Non-HF:
69
+ `{ "mode": "single", "text": "some Odia text", "output_subdir": "demo_run_01" }`
70
+ - HF CSV:
71
+ `{ "mode": "huggingface", "dataset_url": "https://.../data.csv", "text_column": "text", "max_samples": 100, "output_subdir": "hf_demo" }`
72
+ - Response:
73
+ - Non-HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>" }`
74
+ - HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>", "csv": "/static/synthetic/<job_id>/dataset.csv", "images_dir": "/static/synthetic/<job_id>/images" }`
75
+ - Outputs are stored under `backend/data/synth_outputs/<job_id>/` and publicly served at `/static/synthetic/<job_id>/...`.
76
+
77
+ ## Fonts
78
+
79
+ - Generator uses fonts from `content/static/`.
80
+ - Default: `NotoSansOriya_Condensed-Regular.ttf` (configurable). Ensure the directory exists.
81
+
82
+ ## Effects & Styles
83
+
84
+ - Paper styles: lined paper, old paper, birch, parchment
85
+ - Effects: rotation, brightness/contrast/noise/blur, fold/crease, ink bleed, perspective, shadows, morphological ops, scanner artifacts, lens distortion, washboard/cylinder warps
86
+
87
+ ## Notes
88
+
89
+ - The backend expects the Gemini API key to be provided per-request to `/api/ocr/process`. Do not hardcode keys server-side.
90
+ - For HuggingFace datasets, the backend uses `datasets` when possible, or downloads raw CSV URLs.
91
+ - You can browse generated outputs via the links returned by `/api/synthetic/generate`.
92
+
93
+ ## Deploy to Hugging Face Spaces (Docker)
94
+
95
+ This repo includes a multi-stage Dockerfile to deploy both backend and the built frontend as a single Space.
96
+
97
+ Steps:
98
+ - Create a new Space → Type: Docker
99
+ - Push this repository to the Space
100
+ - In Space Settings:
101
+ - Enable Persistent Storage
102
+ - (Optional) Add Secrets/Env Vars as needed, e.g., `DATA_DIR=/data` (default already) and `FRONTEND_DIST=/app/frontend_dist`
103
+ - The container exposes port `7860` by default.
104
+
105
+ What the image does:
106
+ - Builds the frontend (`frontend/`) and copies the `dist/` to `/app/frontend_dist`
107
+ - Installs backend dependencies and runs `uvicorn app.main:app` from `backend/`
108
+ - Serves:
109
+ - API at `/api/...`
110
+ - Uploaded images at `/images`
111
+ - Synthetic outputs at `/static/synthetic`
112
+ - Frontend SPA at `/` (served from `/app/frontend_dist`)
113
+
114
+
115
+ 1. **Paper Textures**: Realistic paper fiber patterns using Perlin noise
116
+ 2. **Aging Effects**: Edge darkening and aging patterns
117
+ 3. **Physical Damage**: Fold lines, creases, and ink bleeding
118
+ 4. **Scanner Artifacts**: Dust, compression artifacts, scanning lines
119
+ 5. **Geometric Distortions**: Perspective changes, cylindrical warping
120
+ 6. **Lighting Effects**: Shadows and lens distortions
121
+
122
+ ## Font Requirements
123
+
124
+ The generator requires appropriate fonts for text rendering. Default configuration expects:
125
+ - Font directory: `/content/static/`
126
+ - Font file: `NotoSansOriya_ExtraCondensed-Regular.ttf`
127
+
128
+ You can specify custom fonts using `--font-dir` and `--font` parameters.
129
+
130
+ ## Performance Tips
131
+
132
+ - Use `--max-samples` to limit processing for large datasets
133
+ - Disable advanced effects with `--no-advanced-effects` for faster generation
134
+ - Use multiprocessing with `--use-multiprocessing` for batch jobs
135
+ - Adjust image dimensions to balance quality and speed
136
+
137
+ ## Error Handling
138
+
139
+ The package includes comprehensive error handling:
140
+ - Graceful fallbacks for missing dependencies
141
+ - Detailed logging for debugging
142
+ - Validation of input parameters
143
+ - Safe handling of malformed datasets
144
+
145
+ ## Contributing
146
+
147
+ The modular structure makes it easy to extend:
148
+ - Add new effects in `effects.py`
149
+ - Implement new background styles in `backgrounds.py`
150
+ - Create custom transformations in `transformations.py`
151
+ - Extend dataset processing in `huggingface_processor.py`
152
+
153
+ ## License
154
+
155
+ [Add your license information here]
156
+
157
+ ---
158
+
159
+ **Note**: This is a complete rewrite of the original monolithic code into a modular, extensible package with added HuggingFace dataset processing capabilities.
backend/app/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Makes 'app' a package
2
+
backend/app/api/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # api package
2
+
backend/app/api/routers/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # routers package
2
+
backend/app/api/routers/ocr.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ from typing import List, Dict
4
+ import os
5
+ import shutil
6
+
7
+ from ...services.annotations import (
8
+ load_annotations_from_csv,
9
+ save_annotations_to_csv,
10
+ save_annotations,
11
+ )
12
+ from ...services.ocr_processor import batch_run_ocr
13
+
14
+
15
+ router = APIRouter(prefix="/api/ocr", tags=["ocr"])
16
+
17
+
18
+ SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
19
+
20
+
21
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
22
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
23
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
24
+ UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
25
+ ANNOTATIONS_DIR = os.path.join(DATA_DIR, "annotations")
26
+ ANNOTATION_CSV_PATH = os.path.join(ANNOTATIONS_DIR, "annotations.csv")
27
+
28
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
29
+ os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
30
+
31
+ # Ensure CSV exists
32
+ if not os.path.exists(ANNOTATION_CSV_PATH):
33
+ with open(ANNOTATION_CSV_PATH, 'w', encoding='utf-8-sig') as f:
34
+ f.write('image_filename,extracted_text,validated_text\n')
35
+
36
+
37
+ @router.post("/upload")
38
+ async def upload_images(files: List[UploadFile] = File(...)):
39
+ image_names: List[str] = []
40
+ for file in files:
41
+ ext = file.filename.split('.')[-1].lower()
42
+ if ext not in SUPPORTED_IMAGE_TYPES:
43
+ continue
44
+
45
+ safe_name = os.path.basename(file.filename)
46
+ path = os.path.join(UPLOAD_DIR, safe_name)
47
+ with open(path, "wb") as f:
48
+ f.write(await file.read())
49
+ image_names.append(safe_name)
50
+ return {"status": "success", "images": image_names}
51
+
52
+
53
+ @router.post("/process")
54
+ def process_ocr(request: Dict[str, object]):
55
+ api_key = str(request.get("api_key", ""))
56
+ image_filenames = list(request.get("image_filenames", []))
57
+ if not api_key:
58
+ raise HTTPException(status_code=400, detail="api_key is required")
59
+ if not image_filenames:
60
+ raise HTTPException(status_code=400, detail="image_filenames is required")
61
+ results = batch_run_ocr(image_filenames, UPLOAD_DIR, api_key)
62
+ return results
63
+
64
+
65
+ @router.get("/annotations")
66
+ def get_annotations():
67
+ try:
68
+ annotations, valid_images, missing_images = load_annotations_from_csv(ANNOTATION_CSV_PATH, UPLOAD_DIR)
69
+ return {
70
+ "annotations": annotations,
71
+ "valid_images": valid_images,
72
+ "missing_images": missing_images
73
+ }
74
+ except Exception as e:
75
+ raise HTTPException(status_code=500, detail=str(e))
76
+
77
+
78
+ @router.post("/save")
79
+ def save_annotated(data: dict):
80
+ try:
81
+ save_annotations(ANNOTATION_CSV_PATH, data)
82
+ return {"status": "saved"}
83
+ except Exception as e:
84
+ raise HTTPException(status_code=500, detail=str(e))
85
+
86
+
87
+ @router.post("/import")
88
+ async def import_csv(file: UploadFile = File(...), image_folder: str = Form("uploaded_images")):
89
+ temp_dir = os.path.join(DATA_DIR, "temp")
90
+ os.makedirs(temp_dir, exist_ok=True)
91
+ temp_path = os.path.join(temp_dir, os.path.basename(file.filename))
92
+
93
+ with open(temp_path, "wb") as buffer:
94
+ shutil.copyfileobj(file.file, buffer)
95
+
96
+ try:
97
+ # If relative folder, resolve within DATA_DIR
98
+ folder = image_folder
99
+ if not os.path.isabs(folder):
100
+ folder = os.path.join(DATA_DIR, folder)
101
+ annotations, valid_images, missing_images = load_annotations_from_csv(temp_path, folder)
102
+ return {
103
+ "annotations": annotations,
104
+ "valid_images": valid_images,
105
+ "missing_images": missing_images
106
+ }
107
+ except Exception as e:
108
+ raise HTTPException(status_code=500, detail=str(e))
109
+
110
+
111
+ @router.post("/export")
112
+ async def export_csv(request: Dict[str, dict]):
113
+ try:
114
+ annotations = request.get("annotations", {})
115
+ validated_texts = request.get("validated_texts", {})
116
+
117
+ combined_data: Dict[str, Dict[str, str]] = {}
118
+ for image_name in annotations.keys():
119
+ combined_data[image_name] = {
120
+ "extracted_text": annotations[image_name],
121
+ "validated_text": validated_texts.get(image_name, "")
122
+ }
123
+
124
+ save_annotations_to_csv(ANNOTATION_CSV_PATH, combined_data)
125
+ return FileResponse(
126
+ ANNOTATION_CSV_PATH,
127
+ media_type='text/csv',
128
+ filename='annotations.csv'
129
+ )
130
+ except Exception as e:
131
+ raise HTTPException(status_code=500, detail=str(e))
132
+
133
+
134
+ # Legacy routes removed after frontend migration to /api/ocr
backend/app/api/routers/synthetic.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from typing import Dict, Optional
3
+ import os
4
+ import uuid
5
+
6
+ from ...services.synthetic.config import ENHANCED_DEFAULT_PARAMS
7
+ from ...services.synthetic.core import (
8
+ generate_enhanced_sanskrit_samples,
9
+ generate_comprehensive_dataset,
10
+ generate_ultra_realistic_samples,
11
+ )
12
+ from ...services.synthetic.huggingface_processor import HuggingFaceDatasetProcessor
13
+
14
+
15
+ router = APIRouter(prefix="/api/synthetic", tags=["synthetic"])
16
+
17
+
18
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
20
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
21
+ SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
22
+ FONTS_DIR = os.path.abspath(os.path.join(BASE_DIR, os.pardir, os.pardir, "content", "static"))
23
+
24
+ os.makedirs(SYN_OUT_DIR, exist_ok=True)
25
+
26
+
27
+ def normalized_params(incoming: Optional[Dict]) -> Dict:
28
+ params = ENHANCED_DEFAULT_PARAMS.copy()
29
+ if incoming:
30
+ # convert hyphen keys to underscore if any
31
+ normalized = {k.replace('-', '_'): v for k, v in incoming.items()}
32
+ params.update(normalized)
33
+ # Force font_dir to repo fonts absolute path for backend execution
34
+ params['font_dir'] = FONTS_DIR
35
+ return params
36
+
37
+
38
+ @router.post("/generate")
39
+ def generate(request: Dict[str, object]):
40
+ """
41
+ Universal generation endpoint.
42
+ Body:
43
+ {
44
+ mode: 'single' | 'comprehensive' | 'ultra-realistic' | 'huggingface',
45
+ text?: string (for non-HF modes),
46
+ output_subdir?: string,
47
+ params?: object,
48
+ text_column?: string (HF),
49
+ max_samples?: int (HF),
50
+ dataset_url?: string (HF) OR csv_file?: string (server-side path)
51
+ }
52
+ Returns paths relative to /static/synthetic when applicable.
53
+ """
54
+ mode = str(request.get("mode", "single"))
55
+ text = str(request.get("text", "")).strip() or "କବି ସମ୍ରାଟ ଉପେନ୍ଦ୍ର ଭଞ୍ଜ ..."
56
+ output_subdir = str(request.get("output_subdir", ""))
57
+ params = normalized_params(request.get("params"))
58
+
59
+ # Resolve output dir under synth_outputs
60
+ job_id = output_subdir or str(uuid.uuid4())
61
+ out_dir = os.path.join(SYN_OUT_DIR, job_id)
62
+ os.makedirs(out_dir, exist_ok=True)
63
+
64
+ try:
65
+ if mode == 'single':
66
+ generate_enhanced_sanskrit_samples(
67
+ text=text,
68
+ font_path=os.path.join(params['font_dir'], params['font']),
69
+ output_dir=out_dir,
70
+ params=params,
71
+ )
72
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
73
+
74
+ elif mode == 'comprehensive':
75
+ generate_comprehensive_dataset(
76
+ text=text,
77
+ output_dir=out_dir,
78
+ params=params,
79
+ )
80
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
81
+
82
+ elif mode == 'ultra-realistic':
83
+ generate_ultra_realistic_samples(
84
+ text=text,
85
+ output_dir=out_dir,
86
+ style_focus=request.get("style_focus"),
87
+ params=params,
88
+ )
89
+ return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
90
+
91
+ elif mode == 'huggingface':
92
+ text_column = str(request.get("text_column", "text"))
93
+ max_samples = request.get("max_samples")
94
+ dataset_url = request.get("dataset_url")
95
+ csv_file = request.get("csv_file")
96
+
97
+ processor = HuggingFaceDatasetProcessor(output_dir=out_dir, params=params)
98
+
99
+ if csv_file:
100
+ ok = processor.process_local_csv(csv_path=csv_file, text_column=text_column, max_samples=max_samples)
101
+ elif dataset_url:
102
+ ok = processor.process_huggingface_dataset(dataset_identifier=dataset_url, text_column=text_column, max_samples=max_samples)
103
+ else:
104
+ raise HTTPException(status_code=400, detail="Provide dataset_url or csv_file for huggingface mode")
105
+
106
+ if not ok:
107
+ raise HTTPException(status_code=500, detail="HuggingFace processing failed")
108
+
109
+ return {
110
+ "status": "ok",
111
+ "output_dir": f"/static/synthetic/{job_id}",
112
+ "csv": f"/static/synthetic/{job_id}/dataset.csv",
113
+ "images_dir": f"/static/synthetic/{job_id}/images"
114
+ }
115
+
116
+ else:
117
+ raise HTTPException(status_code=400, detail=f"Unknown mode: {mode}")
118
+
119
+ except HTTPException:
120
+ raise
121
+ except Exception as e:
122
+ raise HTTPException(status_code=500, detail=str(e))
backend/app/data/annotations/annotations.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ image_filename,extracted_text,validated_text
backend/app/main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ import os
5
+
6
+ from .api.routers.ocr import router as ocr_router
7
+ from .api.routers.synthetic import router as synthetic_router
8
+
9
+
10
+ app = FastAPI(title="Unified Backend: OCR + Synthetic")
11
+
12
+ # CORS (dev-friendly; tighten for prod)
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ # Storage paths (can be overridden by env, e.g., DATA_DIR=/data in Docker)
22
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
23
+ DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
24
+ DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
25
+ UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
26
+ SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
27
+
28
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
29
+ os.makedirs(SYN_OUT_DIR, exist_ok=True)
30
+
31
+ # Routers
32
+ app.include_router(ocr_router)
33
+ app.include_router(synthetic_router)
34
+
35
+ # Static mounts for data
36
+ app.mount("/images", StaticFiles(directory=UPLOAD_DIR), name="images")
37
+ app.mount("/static/synthetic", StaticFiles(directory=SYN_OUT_DIR), name="synthetic")
38
+
39
+ # Serve compiled frontend (if provided via env FRONTEND_DIST)
40
+ FRONTEND_DIST = os.getenv("FRONTEND_DIST")
41
+ if FRONTEND_DIST and os.path.isdir(FRONTEND_DIST):
42
+ app.mount("/", StaticFiles(directory=FRONTEND_DIST, html=True), name="frontend")
43
+
44
+
45
+ @app.get("/")
46
+ def root():
47
+ return {"message": "Unified backend is running", "routes": ["/api/ocr", "/api/synthetic"]}
48
+
49
+
50
+ # Legacy compatibility (optional):
51
+ # If you want to keep old OCR paths working without frontend changes,
52
+ # you can import and map handlers or create thin wrappers here.
53
+ # For now, keep frontend updates in a later step.
backend/app/services/annotations.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+ from typing import Tuple, Dict, List
6
+
7
+
8
+ class CustomJSONEncoder(json.JSONEncoder):
9
+ def default(self, obj):
10
+ if isinstance(obj, (np.float32, np.float64)):
11
+ return float(obj)
12
+ if isinstance(obj, (np.int32, np.int64)):
13
+ return int(obj)
14
+ return super().default(obj)
15
+
16
+
17
+ def load_annotations(path: str) -> Dict:
18
+ if not os.path.exists(path):
19
+ return {}
20
+ with open(path, "r", encoding="utf-8") as f:
21
+ return json.load(f)
22
+
23
+
24
+ def save_annotations(path: str, data: Dict):
25
+ """Save annotations to both JSON and CSV formats."""
26
+ # Save to JSON
27
+ json_path = path.replace('.csv', '.json')
28
+ with open(json_path, "w", encoding="utf-8") as f:
29
+ json.dump(data, f, ensure_ascii=False, indent=2, cls=CustomJSONEncoder)
30
+
31
+ # Save to CSV
32
+ save_annotations_to_csv(path, data)
33
+
34
+
35
+ def load_annotations_from_csv(csv_file: str, image_folder: str) -> Tuple[Dict, List[str], List[str]]:
36
+ if not os.path.exists(csv_file):
37
+ return {}, [], []
38
+
39
+ df = pd.read_csv(csv_file, encoding='utf-8-sig')
40
+
41
+ if 'image_filename' not in df.columns:
42
+ raise ValueError("CSV must contain 'image_filename' column.")
43
+
44
+ annotations: Dict[str, Dict[str, str]] = {}
45
+ valid_images: List[str] = []
46
+ missing_images: List[str] = []
47
+
48
+ for _, row in df.iterrows():
49
+ filename = row['image_filename']
50
+ image_path = os.path.join(image_folder, filename)
51
+ if os.path.exists(image_path):
52
+ annotations[filename] = {
53
+ 'extracted_text': str(row.get('extracted_text', '')),
54
+ 'validated_text': str(row.get('validated_text', row.get('extracted_text', '')))
55
+ }
56
+ valid_images.append(filename)
57
+ else:
58
+ missing_images.append(filename)
59
+
60
+ return annotations, valid_images, missing_images
61
+
62
+
63
+ def save_annotations_to_csv(csv_file: str, annotations: Dict[str, Dict[str, str]]):
64
+ data = [
65
+ {
66
+ 'image_filename': filename,
67
+ 'extracted_text': str(ann.get('extracted_text', '')),
68
+ 'validated_text': str(ann.get('validated_text', ''))
69
+ }
70
+ for filename, ann in annotations.items()
71
+ ]
72
+ df = pd.DataFrame(data)
73
+ os.makedirs(os.path.dirname(csv_file), exist_ok=True)
74
+ df.to_csv(csv_file, index=False, encoding='utf-8-sig')
75
+
backend/app/services/ocr_processor.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unified backend OCR processor using Google Gemini
2
+
3
+ import os
4
+ import base64
5
+ import logging
6
+ import time
7
+ from typing import List, Dict, Optional
8
+
9
+ import google.generativeai as genai
10
+
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='[%(asctime)s] %(levelname)s - %(message)s',
14
+ datefmt='%Y-%m-%d %H:%M:%S'
15
+ )
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
20
+
21
+
22
+ def encode_image_to_base64(image_path: str) -> Optional[str]:
23
+ if not os.path.exists(image_path):
24
+ logger.error(f"Image not found: {image_path}")
25
+ return None
26
+ try:
27
+ with open(image_path, "rb") as img_file:
28
+ return base64.b64encode(img_file.read()).decode("utf-8")
29
+ except Exception as e:
30
+ logger.error(f"Failed to read or encode image {image_path}: {e}")
31
+ return None
32
+
33
+
34
+ def get_mime_type(image_path: str) -> Optional[str]:
35
+ ext = image_path.split(".")[-1].lower()
36
+ if ext in SUPPORTED_IMAGE_TYPES:
37
+ return f"image/{'jpeg' if ext == 'jpg' else ext}"
38
+ logger.warning(f"Unsupported image format: {ext}")
39
+ return None
40
+
41
+
42
+ def run_gemini_ocr(image_path: str, api_key: str, max_retries: int = 3) -> str:
43
+ genai.configure(api_key=api_key)
44
+ model = genai.GenerativeModel("gemini-1.5-flash")
45
+
46
+ base64_image = encode_image_to_base64(image_path)
47
+ mime_type = get_mime_type(image_path)
48
+
49
+ if base64_image is None or mime_type is None:
50
+ return "[Image could not be processed]"
51
+
52
+ prompt = (
53
+ "Extract all visible Odia (ଓଡ଼ିଆ) text from the image accurately.\n"
54
+ "Only output the Odia text content. Do not explain or translate anything.\n"
55
+ "If no Odia text is found, return '[No Odia text found]'."
56
+ )
57
+
58
+ for attempt in range(max_retries):
59
+ try:
60
+ response = model.generate_content(
61
+ [
62
+ prompt,
63
+ {
64
+ "mime_type": mime_type,
65
+ "data": base64_image
66
+ }
67
+ ],
68
+ generation_config={
69
+ "temperature": 0.2,
70
+ "max_output_tokens": 2048,
71
+ "top_p": 0.8,
72
+ "top_k": 40
73
+ }
74
+ )
75
+
76
+ text = response.text.strip() if response.text else "[No text extracted]"
77
+ logger.info(f"OCR complete for {os.path.basename(image_path)}")
78
+ return text
79
+
80
+ except Exception as e:
81
+ logger.error(f"OCR attempt {attempt + 1} failed for {image_path}: {e}")
82
+ if attempt == max_retries - 1:
83
+ return f"[OCR failed after {max_retries} attempts: {str(e)}]"
84
+ time.sleep(1)
85
+
86
+
87
+ def batch_run_ocr(image_filenames: List[str], image_folder: str, api_key: str) -> Dict[str, str]:
88
+ results: Dict[str, str] = {}
89
+ logger.info(f"Starting batch OCR on {len(image_filenames)} images.")
90
+
91
+ for filename in image_filenames:
92
+ image_path = os.path.join(image_folder, filename)
93
+ if not os.path.exists(image_path):
94
+ logger.error(f"Image not found: {image_path}")
95
+ results[filename] = "[Image file not found]"
96
+ continue
97
+
98
+ results[filename] = run_gemini_ocr(image_path, api_key)
99
+
100
+ logger.info("Batch OCR complete.")
101
+ return results
102
+
backend/app/services/synthetic/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthetic Text Generator Package (moved under unified backend services)
3
+ """
4
+
5
+ __version__ = "1.0.0"
6
+
7
+ from .config import ENHANCED_DEFAULT_PARAMS
8
+ from .core import (
9
+ generate_enhanced_sanskrit_samples,
10
+ generate_comprehensive_dataset,
11
+ generate_ultra_realistic_samples,
12
+ )
13
+ from .huggingface_processor import HuggingFaceDatasetProcessor
14
+
15
+ __all__ = [
16
+ "ENHANCED_DEFAULT_PARAMS",
17
+ "generate_enhanced_sanskrit_samples",
18
+ "generate_comprehensive_dataset",
19
+ "generate_ultra_realistic_samples",
20
+ "HuggingFaceDatasetProcessor",
21
+ ]
22
+
backend/app/services/synthetic/backgrounds.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Background generation module for creating realistic paper textures and backgrounds
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import numpy as np
8
+ from PIL import Image
9
+ from typing import Dict
10
+ import logging
11
+ from .effects import AdvancedImageEffects
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_enhanced_background(width: int, height: int, style: str, params: Dict) -> Image.Image:
17
+ if params.get("image_dir") and os.path.exists(params["image_dir"]):
18
+ image_files = [f for f in os.listdir(params["image_dir"]) if f.lower().endswith((".png", ".jpg", ".jpeg"))]
19
+ if image_files:
20
+ img_path = os.path.join(params["image_dir"], random.choice(image_files))
21
+ try:
22
+ bg_img = Image.open(img_path).convert('RGB')
23
+ bg_img = bg_img.resize((width, height), Image.LANCZOS)
24
+ return bg_img
25
+ except Exception as e:
26
+ logger.error(f"Error loading background image {img_path}: {e}")
27
+
28
+ if params.get('fiber_density', 0) > 0:
29
+ fiber_texture = AdvancedImageEffects.simulate_paper_fiber_texture(width, height, params['fiber_density'])
30
+ else:
31
+ fiber_texture = np.zeros((height, width, 3), dtype=np.uint8)
32
+
33
+ if style == "lined_paper":
34
+ background = np.ones((height, width, 3), dtype=np.uint8) * [210, 180, 140]
35
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
36
+ line_spacing = random.randint(15, 25)
37
+ for y in range(0, height, line_spacing):
38
+ line_width = random.randint(1, 2)
39
+ darkness = random.randint(6, 20) * params["texture"]
40
+ if y + line_width < height:
41
+ background[y:y+line_width, :, :] = np.clip(background[y:y+line_width, :, :] - darkness, 0, 255)
42
+ noise = np.random.randint(0, int(15 * params["noise"]), (height, width, 3), dtype=np.uint8)
43
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
44
+ stain_count = int(random.randint(2, 4) * params["stains"])
45
+ for _ in range(stain_count):
46
+ x = random.randint(0, width-100)
47
+ y = random.randint(0, height-100)
48
+ size = random.randint(20, 60)
49
+ darkness = random.randint(8, 25) * params["stain_intensity"]
50
+ stain_mask = np.zeros((size, size), dtype=np.float32)
51
+ center = size // 2
52
+ for i in range(size):
53
+ for j in range(size):
54
+ dist = np.sqrt((i - center)**2 + (j - center)**2)
55
+ if dist < center:
56
+ stain_mask[i, j] = (1 - dist / center) * np.random.uniform(0.4, 1.0)
57
+ end_y = min(y + size, height)
58
+ end_x = min(x + size, width)
59
+ actual_size_y = end_y - y
60
+ actual_size_x = end_x - x
61
+ if actual_size_y > 0 and actual_size_x > 0:
62
+ stain_region = stain_mask[:actual_size_y, :actual_size_x]
63
+ for c in range(3):
64
+ background[y:end_y, x:end_x, c] = np.clip(
65
+ background[y:end_y, x:end_x, c] - darkness * stain_region * params["stain_intensity"], 0, 255
66
+ )
67
+
68
+ elif style == "old_paper":
69
+ background = np.ones((height, width, 3), dtype=np.uint8) * [236, 222, 181]
70
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
71
+ noise = np.random.randint(0, int(12 * params["noise"]), (height, width, 3), dtype=np.uint8)
72
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
73
+ edge_width = width // 10
74
+ for i in range(edge_width):
75
+ factor = (edge_width - i) / edge_width * 15 * params["aging"]
76
+ aging_noise = np.random.uniform(0.5, 1.5, (height, width))
77
+ if i < height:
78
+ background[i, :, 2] = np.clip(background[i, :, 2] - factor * aging_noise[i, :], 0, 255)
79
+ if height - i - 1 >= 0:
80
+ background[height-i-1, :, 2] = np.clip(background[height-i-1, :, 2] - factor * aging_noise[height-i-1, :], 0, 255)
81
+ if i < width:
82
+ background[:, i, 2] = np.clip(background[:, i, 2] - factor * aging_noise[:, i], 0, 255)
83
+ if width - i - 1 >= 0:
84
+ background[:, width-i-1, 2] = np.clip(background[:, width-i-1, 2] - factor * aging_noise[:, width-i-1], 0, 255)
85
+
86
+ elif style == "birch":
87
+ background = np.ones((height, width, 3), dtype=np.uint8) * [235, 225, 215]
88
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
89
+ noise = np.random.randint(0, int(10 * params["noise"]), (height, width, 3), dtype=np.uint8)
90
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
91
+ variation_count = int(150 * params["texture"])
92
+ for _ in range(variation_count):
93
+ x = random.randint(0, width-1)
94
+ y = random.randint(0, height-1)
95
+ size = random.randint(10, 25)
96
+ variation = random.randint(-6, 6) * params["texture"]
97
+ for i in range(-size, size):
98
+ for j in range(-size, size):
99
+ dist = np.sqrt(i*i + j*j)
100
+ if dist <= size:
101
+ shape_factor = np.random.uniform(0.7, 1.3)
102
+ if dist <= size * shape_factor:
103
+ yi, xi = y + i, x + j
104
+ if 0 <= yi < height and 0 <= xi < width:
105
+ background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
106
+
107
+ else: # parchment
108
+ background = np.ones((height, width, 3), dtype=np.uint8) * [230, 215, 185]
109
+ background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
110
+ variation_count = int(400 * params["texture"])
111
+ for _ in range(variation_count):
112
+ x = random.randint(0, width-1)
113
+ y = random.randint(0, height-1)
114
+ size = random.randint(5, 12)
115
+ variation = random.randint(-7, 7) * params["texture"]
116
+ for i in range(-size, size):
117
+ for j in range(-size, size):
118
+ dist = np.sqrt(i*i + j*j)
119
+ if dist <= size:
120
+ grain_factor = 1 + 0.3 * np.sin(j * 0.5) * np.cos(i * 0.3)
121
+ if dist <= size * grain_factor:
122
+ yi, xi = y + i, x + j
123
+ if 0 <= yi < height and 0 <= xi < width:
124
+ background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
125
+ noise = np.random.randint(0, int(8 * params["noise"]), (height, width, 3), dtype=np.uint8)
126
+ background = np.clip(background - noise, 0, 255).astype(np.uint8)
127
+
128
+ return Image.fromarray(background)
129
+
backend/app/services/synthetic/config.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration parameters for the Synthetic Text Generator
3
+ """
4
+
5
+ ENHANCED_DEFAULT_PARAMS = {
6
+ 'width': 400,
7
+ 'height': 320,
8
+ 'base_images': 1,
9
+
10
+ 'font_dir': './content/static',
11
+ 'font': 'NotoSansOriya_Condensed-Regular.ttf',
12
+
13
+ 'noise': 0.7,
14
+ 'aging': 0.6,
15
+ 'texture': 0.7,
16
+ 'stains': 0.6,
17
+ 'stain_intensity': 0.5,
18
+
19
+ 'word_position': 0.6,
20
+ 'ink_color': 0.5,
21
+ 'line_spacing': 0.4,
22
+ 'baseline': 0.3,
23
+ 'word_angle': 0.0,
24
+
25
+ 'apply_transforms': True,
26
+ 'all_transforms': False,
27
+ 'rotation_max': 5.0,
28
+ 'brightness_var': 0.2,
29
+ 'contrast_var': 0.2,
30
+ 'noise_min': 0.01,
31
+ 'noise_max': 0.05,
32
+ 'blur_min': 0.5,
33
+ 'blur_max': 1.0,
34
+
35
+ 'fold_intensity': 0.3,
36
+ 'bleed_intensity': 0.3,
37
+ 'bleed_radius': 3,
38
+ 'corner_displacement': 20,
39
+ 'morph_operation': 'mixed',
40
+ 'morph_kernel_size': 3,
41
+ 'aging_intensity': 0.5,
42
+ 'fiber_density': 0.5,
43
+ 'enable_advanced_effects': True,
44
+ 'advanced_effect_probability': 0.7,
45
+ 'shadow_angle': 45,
46
+ 'shadow_intensity': 0.4,
47
+ 'lens_distortion_strength': 0.2,
48
+ 'scanner_artifacts': True,
49
+ 'compression_quality': 85,
50
+ 'fold_probability': 0.4,
51
+ 'crease_probability': 0.3,
52
+ 'perspective_probability': 0.5,
53
+ 'shadow_probability': 0.6,
54
+
55
+ 'use_multiprocessing': False,
56
+ 'num_processes': 4,
57
+ 'enable_caching': True,
58
+ 'debug_mode': False,
59
+ 'image_dir': ''
60
+ }
61
+
backend/app/services/synthetic/core.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core module containing main generation functions
3
+ """
4
+
5
+ import os
6
+ import random
7
+ import logging
8
+ from typing import Dict, List, Optional
9
+ from PIL import Image
10
+ from .config import ENHANCED_DEFAULT_PARAMS
11
+ from .text_renderer import render_enhanced_sanskrit
12
+ from .transformations import (
13
+ apply_enhanced_postprocessing,
14
+ create_comprehensive_effect_combinations,
15
+ apply_systematic_postprocessing,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def generate_enhanced_sanskrit_samples(
22
+ text: str,
23
+ font_path: str = None,
24
+ output_dir: str = None,
25
+ params: Dict = None,
26
+ ) -> Optional[List[Image.Image]]:
27
+ if params is None:
28
+ params = ENHANCED_DEFAULT_PARAMS.copy()
29
+ else:
30
+ params = {**ENHANCED_DEFAULT_PARAMS, **params}
31
+
32
+ if font_path is None:
33
+ font_path = os.path.join(params['font_dir'], params['font'])
34
+
35
+ if not os.path.exists(font_path):
36
+ logger.error(f"Font not found at {font_path}")
37
+ return [] if output_dir is None else None
38
+
39
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
40
+
41
+ ink_colors = {
42
+ "lined_paper": (60, 30, 10),
43
+ "old_paper": (20, 20, 20),
44
+ "birch": (50, 20, 10),
45
+ "parchment": (10, 10, 10),
46
+ }
47
+
48
+ width, height = params['width'], params['height']
49
+ if output_dir:
50
+ os.makedirs(output_dir, exist_ok=True)
51
+
52
+ sampled_styles = random.choices(styles, k=params['base_images'])
53
+ style_counts = {style: sampled_styles.count(style) for style in styles}
54
+ logger.info(f"Randomly selected styles: {style_counts}")
55
+
56
+ base_images = []
57
+
58
+ for style, count in style_counts.items():
59
+ for i in range(count):
60
+ font_size = random.randint(12, 18)
61
+ output_path = (
62
+ os.path.join(output_dir, f"enhanced_sanskrit_{style}_{i+1}.png")
63
+ if output_dir
64
+ else None
65
+ )
66
+
67
+ img = render_enhanced_sanskrit(
68
+ text=text,
69
+ font_path=font_path,
70
+ output_path=output_path,
71
+ width=width,
72
+ height=height,
73
+ font_size=font_size,
74
+ style=style,
75
+ ink_color=ink_colors[style],
76
+ params=params,
77
+ )
78
+
79
+ if img:
80
+ base_images.append(img)
81
+ if params['apply_transforms'] and output_dir:
82
+ base_filename = f"enhanced_sanskrit_{style}_{i+1}"
83
+ transformed_images = apply_enhanced_postprocessing(
84
+ img, output_dir, base_filename, params
85
+ )
86
+ base_images.extend(transformed_images[1:])
87
+
88
+ return base_images if output_dir is None else None
89
+
90
+
91
+ def generate_comprehensive_dataset(
92
+ text: str, font_path: str = None, output_dir: str = None, params: Dict = None
93
+ ) -> List[Image.Image]:
94
+ if params is None:
95
+ params = ENHANCED_DEFAULT_PARAMS.copy()
96
+ else:
97
+ params = {**ENHANCED_DEFAULT_PARAMS, **params}
98
+
99
+ if font_path is None:
100
+ font_path = os.path.join(params['font_dir'], params['font'])
101
+
102
+ if not os.path.exists(font_path):
103
+ logger.error(f"Font not found at {font_path}")
104
+ return []
105
+
106
+ if output_dir:
107
+ os.makedirs(output_dir, exist_ok=True)
108
+
109
+ effect_combinations = create_comprehensive_effect_combinations()
110
+
111
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
112
+ ink_colors = {
113
+ "lined_paper": (60, 30, 10),
114
+ "old_paper": (20, 20, 20),
115
+ "birch": (50, 20, 10),
116
+ "parchment": (10, 10, 10),
117
+ }
118
+
119
+ width, height = params['width'], params['height']
120
+ all_generated_images: List[Image.Image] = []
121
+
122
+ logger.info(
123
+ f"Generating comprehensive dataset with {len(effect_combinations)} effect combinations"
124
+ )
125
+
126
+ for style in styles:
127
+ font_size = random.randint(14, 18)
128
+ output_path = os.path.join(output_dir, f"base_{style}.png") if output_dir else None
129
+
130
+ base_image = render_enhanced_sanskrit(
131
+ text=text,
132
+ font_path=font_path,
133
+ output_path=output_path,
134
+ width=width,
135
+ height=height,
136
+ font_size=font_size,
137
+ style=style,
138
+ ink_color=ink_colors[style],
139
+ params=params,
140
+ )
141
+
142
+ if base_image:
143
+ all_generated_images.append(base_image)
144
+ for combo_idx, effect_combo in enumerate(effect_combinations):
145
+ base_filename = f"comprehensive_{style}_{combo_idx:03d}"
146
+ enhanced_images = apply_systematic_postprocessing(
147
+ base_image, output_dir, base_filename, params, effect_combo
148
+ )
149
+ all_generated_images.extend(enhanced_images[1:])
150
+
151
+ logger.info(f"Total images generated: {len(all_generated_images)}")
152
+ return all_generated_images
153
+
154
+
155
+ def generate_ultra_realistic_samples(
156
+ text: str, output_dir: str = None, style_focus: str = None, params: Dict = None
157
+ ) -> List[Image.Image]:
158
+ if params is None:
159
+ params = ENHANCED_DEFAULT_PARAMS.copy()
160
+
161
+ ultra_realistic_params = {
162
+ **params,
163
+ 'fold_intensity': 0.4,
164
+ 'bleed_intensity': 0.35,
165
+ 'shadow_intensity': 0.5,
166
+ 'lens_distortion_strength': 0.15,
167
+ 'aging_intensity': 0.7,
168
+ 'fiber_density': 0.6,
169
+ 'texture': 0.8,
170
+ 'noise': 0.6,
171
+ 'stains': 0.7,
172
+ 'stain_intensity': 0.6,
173
+ }
174
+
175
+ if output_dir:
176
+ os.makedirs(output_dir, exist_ok=True)
177
+
178
+ ultra_combinations = [
179
+ ["fold_crease", "ink_bleed", "shadow_cast", "scanner_artifacts"],
180
+ ["perspective", "morphological", "lens_distortion", "washboard"],
181
+ ["cylinder", "scanner_artifacts", "lens_distortion", "shadow_cast"],
182
+ ["fold_crease", "ink_bleed", "morphological", "perspective"],
183
+ [
184
+ "fold_crease",
185
+ "ink_bleed",
186
+ "perspective",
187
+ "shadow_cast",
188
+ "morphological",
189
+ "scanner_artifacts",
190
+ "lens_distortion",
191
+ ],
192
+ ["perspective", "lens_distortion", "shadow_cast", "cylinder"],
193
+ ["washboard", "ink_bleed", "morphological", "fold_crease"],
194
+ ]
195
+
196
+ font_path = os.path.join(ultra_realistic_params['font_dir'], ultra_realistic_params['font'])
197
+
198
+ styles = ["lined_paper", "old_paper", "birch", "parchment"] if not style_focus else [style_focus]
199
+ ink_colors = {
200
+ "lined_paper": (60, 30, 10),
201
+ "old_paper": (20, 20, 20),
202
+ "birch": (50, 20, 10),
203
+ "parchment": (10, 10, 10),
204
+ }
205
+
206
+ all_images: List[Image.Image] = []
207
+
208
+ for style in styles:
209
+ base_image = render_enhanced_sanskrit(
210
+ text=text,
211
+ font_path=font_path,
212
+ output_path=None,
213
+ width=ultra_realistic_params['width'],
214
+ height=ultra_realistic_params['height'],
215
+ font_size=random.randint(14, 18),
216
+ style=style,
217
+ ink_color=ink_colors[style],
218
+ params=ultra_realistic_params,
219
+ )
220
+
221
+ if base_image:
222
+ for combo_idx, effect_combo in enumerate(ultra_combinations):
223
+ base_filename = f"ultra_realistic_{style}_{combo_idx:02d}"
224
+ enhanced_images = apply_systematic_postprocessing(
225
+ base_image, output_dir, base_filename, ultra_realistic_params, effect_combo
226
+ )
227
+ all_images.extend(enhanced_images[1:])
228
+
229
+ return all_images
230
+
backend/app/services/synthetic/effects.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced image effects for synthetic text generation
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import random
8
+ import logging
9
+ from typing import List, Tuple
10
+ from noise import pnoise2
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class EffectPlugin:
16
+ def __init__(self, name: str, params: dict):
17
+ self.name = name
18
+ self.params = params
19
+ self.validate_params()
20
+
21
+ def apply(self, image: np.ndarray) -> np.ndarray:
22
+ raise NotImplementedError
23
+
24
+ def validate_params(self):
25
+ pass
26
+
27
+
28
+ class AdvancedImageEffects:
29
+ @staticmethod
30
+ def generate_perlin_noise(width: int, height: int, scale: float = 0.1, octaves: int = 4) -> np.ndarray:
31
+ noise_map = np.zeros((height, width))
32
+ for i in range(height):
33
+ for j in range(width):
34
+ noise_map[i][j] = pnoise2(i * scale, j * scale, octaves=octaves)
35
+ return noise_map
36
+
37
+ @staticmethod
38
+ def simulate_paper_fiber_texture(width: int, height: int, fiber_density: float = 0.5) -> np.ndarray:
39
+ try:
40
+ base_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.02, 4)
41
+ fine_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.1, 2)
42
+ combined = base_texture * 0.7 + fine_texture * 0.3
43
+ combined = ((combined + 1) / 2) * fiber_density * 20
44
+ texture = np.stack([combined, combined, combined], axis=2)
45
+ return texture.astype(np.uint8)
46
+ except Exception as e:
47
+ logger.warning(f"Failed to generate Perlin noise texture: {e}")
48
+ return np.random.randint(0, int(20 * fiber_density), (height, width, 3), dtype=np.uint8)
49
+
50
+ @staticmethod
51
+ def simulate_fold_crease(image: np.ndarray, fold_lines: List[Tuple], fold_intensity: float = 0.5) -> np.ndarray:
52
+ try:
53
+ height, width = image.shape[:2]
54
+ result = image.copy()
55
+ for fold_line in fold_lines:
56
+ y_coords, x_coords = np.ogrid[:height, :width]
57
+ x1, y1, x2, y2 = fold_line
58
+ line_length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
59
+ if line_length > 0:
60
+ distances = np.abs((y2 - y1) * x_coords - (x2 - x1) * y_coords + x2 * y1 - y2 * x1) / line_length
61
+ fold_width = min(width, height) * 0.1
62
+ fold_profile = np.exp(-0.5 * (distances / fold_width)**2)
63
+ fold_effect = fold_profile * fold_intensity * 40
64
+ shadow_mask = (y_coords - y1) * (x2 - x1) - (x_coords - x1) * (y2 - y1) > 0
65
+ shadow_effect = fold_profile * shadow_mask * fold_intensity * 20
66
+ result = result.astype(np.float32)
67
+ result[:, :, 0] -= fold_effect + shadow_effect
68
+ result[:, :, 1] -= fold_effect + shadow_effect
69
+ result[:, :, 2] -= fold_effect + shadow_effect
70
+ result = np.clip(result, 0, 255).astype(np.uint8)
71
+ return result
72
+ except Exception as e:
73
+ logger.error(f"Error in fold/crease simulation: {e}")
74
+ return image
75
+
76
+ @staticmethod
77
+ def simulate_ink_bleed(image: np.ndarray, bleed_intensity: float = 0.3, bleed_radius: int = 3) -> np.ndarray:
78
+ try:
79
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
80
+ _, text_mask = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
81
+ kernel_size = max(1, int(bleed_radius * 2 + 1))
82
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
83
+ bleeding_mask = cv2.dilate(text_mask, kernel, iterations=1)
84
+ bleed_effect = cv2.GaussianBlur(bleeding_mask.astype(np.float32), (kernel_size, kernel_size), 0)
85
+ bleed_effect = bleed_effect * bleed_intensity / 255.0
86
+ result = image.astype(np.float32)
87
+ for c in range(3):
88
+ result[:, :, c] = result[:, :, c] * (1 - 0.5 * bleed_effect)
89
+ return np.clip(result, 0, 255).astype(np.uint8)
90
+ except Exception as e:
91
+ logger.error(f"Error in ink bleed simulation: {e}")
92
+ return image
93
+
94
+ @staticmethod
95
+ def apply_perspective_distortion(image: np.ndarray, corner_displacement: int = 20) -> np.ndarray:
96
+ try:
97
+ height, width = image.shape[:2]
98
+ src_points = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
99
+ dst_points = src_points.copy()
100
+ for i in range(4):
101
+ dst_points[i][0] += random.randint(-corner_displacement, corner_displacement)
102
+ dst_points[i][1] += random.randint(-corner_displacement, corner_displacement)
103
+ dst_points[:, 0] = np.clip(dst_points[:, 0], -width*0.1, width*1.1)
104
+ dst_points[:, 1] = np.clip(dst_points[:, 1], -height*0.1, height*1.1)
105
+ matrix = cv2.getPerspectiveTransform(src_points, dst_points)
106
+ result = cv2.warpPerspective(
107
+ image, matrix, (width, height), borderMode=cv2.BORDER_REPLICATE
108
+ )
109
+ return result
110
+ except Exception as e:
111
+ logger.error(f"Error in perspective distortion: {e}")
112
+ return image
113
+
114
+ @staticmethod
115
+ def apply_shadow_effects(image: np.ndarray, shadow_angle: float = 45, shadow_intensity: float = 0.4) -> np.ndarray:
116
+ try:
117
+ height, width = image.shape[:2]
118
+ result = image.copy().astype(np.float32)
119
+ angle_rad = np.radians(shadow_angle)
120
+ x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
121
+ shadow_factor = (np.cos(angle_rad) * x_coords / width + np.sin(angle_rad) * y_coords / height)
122
+ shadow_factor = np.clip(shadow_factor, 0, 1)
123
+ shadow_effect = 1 - shadow_factor * shadow_intensity
124
+ for c in range(3):
125
+ result[:, :, c] *= shadow_effect
126
+ return np.clip(result, 0, 255).astype(np.uint8)
127
+ except Exception as e:
128
+ logger.error(f"Error in shadow effects: {e}")
129
+ return image
130
+
131
+ @staticmethod
132
+ def apply_morphological_operations(image: np.ndarray, operation: str = 'mixed', kernel_size: int = 3) -> np.ndarray:
133
+ try:
134
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
135
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
136
+ if operation == 'erosion':
137
+ processed = cv2.erode(gray, kernel, iterations=1)
138
+ elif operation == 'dilation':
139
+ processed = cv2.dilate(gray, kernel, iterations=1)
140
+ elif operation == 'opening':
141
+ processed = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
142
+ elif operation == 'closing':
143
+ processed = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
144
+ else:
145
+ ops = ['erosion', 'dilation', 'opening', 'closing']
146
+ import random as _r
147
+ chosen = _r.choice(ops)
148
+ return AdvancedImageEffects.apply_morphological_operations(image, chosen, kernel_size)
149
+ return cv2.cvtColor(processed, cv2.COLOR_GRAY2RGB)
150
+ except Exception as e:
151
+ logger.error(f"Error in morphological operations: {e}")
152
+ return image
153
+
154
+ @staticmethod
155
+ def simulate_scanner_artifacts(image: np.ndarray, compression_quality: int = 85) -> np.ndarray:
156
+ try:
157
+ height, width = image.shape[:2]
158
+ result = image.copy()
159
+ for y in range(0, height, random.randint(8, 15)):
160
+ intensity = random.randint(5, 15)
161
+ if y < height:
162
+ result[y, :, :] = np.clip(result[y, :, :] - intensity, 0, 255)
163
+ dust_count = random.randint(3, 8)
164
+ for _ in range(dust_count):
165
+ x = random.randint(0, width - 5)
166
+ y = random.randint(0, height - 5)
167
+ size = random.randint(2, 5)
168
+ dust_intensity = random.randint(20, 40)
169
+ result[y:y+size, x:x+size, :] = np.clip(result[y:y+size, x:x+size, :] - dust_intensity, 0, 255)
170
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compression_quality]
171
+ _, encimg = cv2.imencode('.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR), encode_param)
172
+ result = cv2.imdecode(encimg, cv2.IMREAD_COLOR)
173
+ result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
174
+ return result
175
+ except Exception as e:
176
+ logger.error(f"Error in scanner artifacts: {e}")
177
+ return image
178
+
179
+ @staticmethod
180
+ def apply_lens_distortion(image: np.ndarray, strength: float = 0.2) -> np.ndarray:
181
+ try:
182
+ height, width = image.shape[:2]
183
+ center_x, center_y = width // 2, height // 2
184
+ y_coords, x_coords = np.ogrid[:height, :width]
185
+ distances = np.sqrt((x_coords - center_x)**2 + (y_coords - center_y)**2)
186
+ max_distance = np.sqrt(center_x**2 + center_y**2)
187
+ normalized_distances = distances / max_distance
188
+ distortion_factor = 1 + strength * normalized_distances**2
189
+ map_x = ((x_coords - center_x) / distortion_factor + center_x).astype(np.float32)
190
+ map_y = ((y_coords - center_y) / distortion_factor + center_y).astype(np.float32)
191
+ result = cv2.remap(image, map_x, map_y, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
192
+ return result
193
+ except Exception as e:
194
+ logger.error(f"Error in lens distortion: {e}")
195
+ return image
196
+
197
+
198
+ def generate_random_fold_lines(image_size: Tuple[int, int], num_folds: int = None) -> List[Tuple]:
199
+ width, height = image_size
200
+ if num_folds is None:
201
+ num_folds = random.randint(1, 3)
202
+ fold_lines = []
203
+ for _ in range(num_folds):
204
+ x1 = random.randint(0, width)
205
+ y1 = random.randint(0, height)
206
+ x2 = random.randint(0, width)
207
+ y2 = random.randint(0, height)
208
+ fold_lines.append((x1, y1, x2, y2))
209
+ return fold_lines
210
+
211
+
212
+ def safe_apply_effect(effect_func, image: np.ndarray, effect_name: str) -> np.ndarray:
213
+ try:
214
+ return effect_func(image)
215
+ except Exception as e:
216
+ logger.error(f"Error applying {effect_name}: {e}")
217
+ return image
218
+
backend/app/services/synthetic/huggingface_processor.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face dataset processor for downloading datasets and generating synthetic text images
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import pandas as pd
8
+ from typing import Dict, List, Optional
9
+ from urllib.parse import urlparse
10
+ import requests
11
+ from PIL import Image
12
+
13
+ from datasets import load_dataset
14
+ import datasets
15
+
16
+ from .config import ENHANCED_DEFAULT_PARAMS
17
+ from .text_renderer import render_enhanced_sanskrit
18
+ from .transformations import apply_enhanced_postprocessing
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class HuggingFaceDatasetProcessor:
24
+ def __init__(self, output_dir: str = "hf_dataset_output", params: Dict = None):
25
+ self.output_dir = output_dir
26
+ self.params = params if params else ENHANCED_DEFAULT_PARAMS.copy()
27
+ self.image_dir = os.path.join(output_dir, "images")
28
+ self.csv_path = os.path.join(output_dir, "dataset.csv")
29
+ os.makedirs(self.image_dir, exist_ok=True)
30
+ self.ink_colors = {
31
+ "lined_paper": (60, 30, 10),
32
+ "old_paper": (20, 20, 20),
33
+ "birch": (50, 20, 10),
34
+ "parchment": (10, 10, 10),
35
+ }
36
+
37
+ def load_huggingface_dataset(self, dataset_name: str, config_name: str = None, split: str = None, streaming: bool = False):
38
+ try:
39
+ dataset = load_dataset(dataset_name, config_name, split=split, streaming=streaming)
40
+ if streaming:
41
+ return dataset
42
+ else:
43
+ if isinstance(dataset, datasets.DatasetDict):
44
+ if split:
45
+ df = dataset[split].to_pandas()
46
+ elif 'train' in dataset:
47
+ df = dataset['train'].to_pandas()
48
+ else:
49
+ first_split = list(dataset.keys())[0]
50
+ df = dataset[first_split].to_pandas()
51
+ else:
52
+ df = dataset.to_pandas()
53
+ return df
54
+ except Exception as e:
55
+ logger.error(f"Error loading Hugging Face dataset: {e}")
56
+ return None
57
+
58
+ def download_dataset_from_url(self, url: str, output_file: str = "dataset.csv") -> bool:
59
+ try:
60
+ if "huggingface.co/datasets" in url:
61
+ parsed = urlparse(url)
62
+ dataset_path = parsed.path.strip('/')
63
+ if "/blob/main/" in url:
64
+ raw_url = url.replace("/blob/main/", "/raw/main/")
65
+ elif "/tree/main" in url:
66
+ raw_url = url.replace("/tree/main", "/raw/main/dataset.csv")
67
+ else:
68
+ raw_url = f"https://huggingface.co/{dataset_path}/raw/main/dataset.csv"
69
+ else:
70
+ raw_url = url
71
+ response = requests.get(raw_url, stream=True)
72
+ response.raise_for_status()
73
+ file_path = os.path.join(self.output_dir, output_file)
74
+ with open(file_path, 'wb') as f:
75
+ for chunk in response.iter_content(chunk_size=8192):
76
+ f.write(chunk)
77
+ return True
78
+ except Exception as e:
79
+ logger.error(f"Error downloading dataset: {e}")
80
+ return False
81
+
82
+ def load_dataset(self, file_path: str, text_column: str) -> Optional[pd.DataFrame]:
83
+ try:
84
+ encodings = ['utf-8', 'iso-8859-1', 'windows-1252', 'utf-16']
85
+ df = None
86
+ for encoding in encodings:
87
+ try:
88
+ df = pd.read_csv(file_path, encoding=encoding)
89
+ break
90
+ except UnicodeDecodeError:
91
+ continue
92
+ if df is None:
93
+ raise Exception("Could not load dataset with any supported encoding")
94
+ if text_column not in df.columns:
95
+ raise Exception(f"Column '{text_column}' not found. Available columns: {list(df.columns)}")
96
+ initial_rows = len(df)
97
+ df = df.dropna(subset=[text_column])
98
+ df = df[df[text_column].str.strip() != ""]
99
+ return df
100
+ except Exception as e:
101
+ logger.error(f"Error loading dataset: {e}")
102
+ return None
103
+
104
+ def generate_images_from_dataset(self, dataset_df: pd.DataFrame, text_column: str, max_samples: int = None) -> List[Dict]:
105
+ results = []
106
+ if max_samples and max_samples < len(dataset_df):
107
+ dataset_df = dataset_df.head(max_samples)
108
+ styles = ["lined_paper", "old_paper", "birch", "parchment"]
109
+ for idx, row in dataset_df.iterrows():
110
+ try:
111
+ text = str(row[text_column]).strip()
112
+ if not text:
113
+ continue
114
+ style = styles[idx % len(styles)]
115
+ base_filename = f"text_image_{idx:06d}"
116
+ image_filename = f"{base_filename}.png"
117
+ image_path = os.path.join(self.image_dir, image_filename)
118
+ img = render_enhanced_sanskrit(
119
+ text=text,
120
+ font_path=os.path.join(self.params['font_dir'], self.params['font']),
121
+ output_path=None,
122
+ width=self.params['width'],
123
+ height=self.params['height'],
124
+ font_size=14,
125
+ style=style,
126
+ ink_color=self.ink_colors[style],
127
+ params=self.params,
128
+ )
129
+ if img is None:
130
+ continue
131
+ if self.params.get('apply_transforms', True):
132
+ transformed_images = apply_enhanced_postprocessing(img, None, base_filename, self.params)
133
+ final_img = transformed_images[-1] if len(transformed_images) > 1 else img
134
+ else:
135
+ final_img = img
136
+ final_img.save(image_path)
137
+ result = {
138
+ 'row_index': idx,
139
+ 'image_path': os.path.relpath(image_path, self.output_dir),
140
+ 'text': text,
141
+ 'style': style,
142
+ 'image_filename': image_filename,
143
+ }
144
+ for col in dataset_df.columns:
145
+ if col != text_column:
146
+ result[col] = row[col]
147
+ results.append(result)
148
+ except Exception as e:
149
+ logger.error(f"Error processing row {idx}: {e}")
150
+ continue
151
+ return results
152
+
153
+ def save_results_csv(self, results: List[Dict], additional_info: Dict = None):
154
+ try:
155
+ if not results:
156
+ return
157
+ df = pd.DataFrame(results)
158
+ important_cols = ['image_path', 'text', 'style', 'image_filename']
159
+ other_cols = [col for col in df.columns if col not in important_cols]
160
+ df = df[important_cols + other_cols]
161
+ df.to_csv(self.csv_path, index=False, encoding='utf-8')
162
+ if additional_info:
163
+ metadata_path = os.path.join(self.output_dir, "metadata.txt")
164
+ with open(metadata_path, 'w', encoding='utf-8') as f:
165
+ f.write("Dataset Processing Metadata\n")
166
+ f.write("=" * 30 + "\n")
167
+ for key, value in additional_info.items():
168
+ f.write(f"{key}: {value}\n")
169
+ except Exception as e:
170
+ logger.error(f"Error saving results: {e}")
171
+
172
+ def process_huggingface_dataset(self, dataset_identifier: str, text_column: str, max_samples: int = None, config_name: str = None, split: str = None) -> bool:
173
+ try:
174
+ df = None
175
+ if not dataset_identifier.startswith("http"):
176
+ df = self.load_huggingface_dataset(dataset_identifier, config_name=config_name, split=split)
177
+ if df is None and dataset_identifier.startswith("http"):
178
+ dataset_file = "downloaded_dataset.csv"
179
+ if self.download_dataset_from_url(dataset_identifier, dataset_file):
180
+ dataset_path = os.path.join(self.output_dir, dataset_file)
181
+ df = self.load_dataset(dataset_path, text_column)
182
+ if df is None:
183
+ return False
184
+ if text_column not in df.columns:
185
+ return False
186
+ results = self.generate_images_from_dataset(df, text_column, max_samples)
187
+ if not results:
188
+ return False
189
+ additional_info = {
190
+ "dataset_identifier": dataset_identifier,
191
+ "config_name": config_name,
192
+ "split": split,
193
+ "text_column": text_column,
194
+ "original_rows": len(df),
195
+ "processed_rows": len(results),
196
+ "max_samples": max_samples or "all",
197
+ "output_directory": self.output_dir,
198
+ "image_directory": self.image_dir,
199
+ }
200
+ self.save_results_csv(results, additional_info)
201
+ return True
202
+ except Exception as e:
203
+ logger.error(f"Error in dataset processing workflow: {e}")
204
+ return False
205
+
206
+ def process_local_csv(self, csv_path: str, text_column: str, max_samples: int = None) -> bool:
207
+ try:
208
+ df = self.load_dataset(csv_path, text_column)
209
+ if df is None:
210
+ return False
211
+ results = self.generate_images_from_dataset(df, text_column, max_samples)
212
+ if not results:
213
+ return False
214
+ additional_info = {
215
+ "source_file": csv_path,
216
+ "text_column": text_column,
217
+ "original_rows": len(df),
218
+ "processed_rows": len(results),
219
+ "max_samples": max_samples or "all",
220
+ "output_directory": self.output_dir,
221
+ "image_directory": self.image_dir,
222
+ }
223
+ self.save_results_csv(results, additional_info)
224
+ return True
225
+ except Exception as e:
226
+ logger.error(f"Error processing local CSV: {e}")
227
+ return False
228
+
backend/app/services/synthetic/text_renderer.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text rendering module for Sanskrit/Oriya text with various effects
3
+ """
4
+
5
+ import os
6
+ import math
7
+ import random
8
+ import logging
9
+ from typing import Dict, Tuple, Optional
10
+ import numpy as np
11
+ from PIL import Image, ImageDraw, ImageFont
12
+ from .backgrounds import create_enhanced_background
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def render_enhanced_sanskrit(
18
+ text: str,
19
+ font_path: str,
20
+ output_path: str,
21
+ width: int,
22
+ height: int,
23
+ font_size: int,
24
+ style: str,
25
+ ink_color: Tuple[int, int, int],
26
+ params: Dict,
27
+ ) -> Optional[Image.Image]:
28
+ img = create_enhanced_background(width, height, style, params)
29
+ draw = ImageDraw.Draw(img)
30
+
31
+ try:
32
+ font = ImageFont.truetype(font_path, font_size)
33
+ words = text.strip().replace('\n', ' ').split()
34
+ y_position = random.randint(25, 75)
35
+ margin = 25
36
+ available_width = width - 2 * margin
37
+ space_width = draw.textlength(" ", font=font)
38
+
39
+ current_line = []
40
+ current_line_width = 0
41
+ all_lines = []
42
+ for word in words:
43
+ word_width = draw.textlength(word, font=font)
44
+ if current_line and current_line_width + space_width + word_width > available_width:
45
+ all_lines.append(current_line)
46
+ current_line = [word]
47
+ current_line_width = word_width
48
+ else:
49
+ if current_line:
50
+ current_line_width += space_width + word_width
51
+ else:
52
+ current_line_width = word_width
53
+ current_line.append(word)
54
+ if current_line:
55
+ all_lines.append(current_line)
56
+
57
+ for line in all_lines:
58
+ line_text = " ".join(line)
59
+ line_width = draw.textlength(line_text, font=font)
60
+ x_position = (width - line_width) // 2
61
+ baseline_offset = random.randint(-2, 2) * params["baseline"]
62
+ y_line_position = y_position + baseline_offset
63
+ if y_line_position + font_size > height - margin:
64
+ break
65
+ x_word_position = x_position
66
+ for word in line:
67
+ word_x_offset = int(random.uniform(-1.5, 1.5) * params["word_position"])
68
+ word_y_offset = int(random.uniform(-1, 1) * params["word_position"])
69
+ color_variation = int(random.randint(-3, 3) * params["ink_color"])
70
+ word_color = (
71
+ np.clip(ink_color[0] + color_variation, 0, 255),
72
+ np.clip(ink_color[1] + color_variation, 0, 255),
73
+ np.clip(ink_color[2] + color_variation, 0, 255),
74
+ )
75
+ word_width = draw.textlength(word, font=font)
76
+ word_height = font_size * 1.2
77
+ if params["word_angle"] > 0:
78
+ word_angle = random.uniform(-2, 2) * params["word_angle"]
79
+ diagonal = math.sqrt(word_width**2 + word_height**2)
80
+ padding = int(diagonal * 0.5)
81
+ temp_width = int(diagonal + 2 * padding)
82
+ temp_height = int(diagonal + 2 * padding)
83
+ txt_img = Image.new('RGBA', (temp_width, temp_height), (0, 0, 0, 0))
84
+ txt_d = ImageDraw.Draw(txt_img)
85
+ center_x = temp_width // 2 - word_width // 2
86
+ center_y = temp_height // 2 - word_height // 2
87
+ txt_d.text((center_x, center_y), word, font=font, fill=word_color + (255,))
88
+ rotated = txt_img.rotate(
89
+ word_angle, resample=Image.BICUBIC, expand=0, center=(temp_width//2, temp_height//2)
90
+ )
91
+ paste_x = int(x_word_position + word_x_offset - padding)
92
+ paste_y = int(y_line_position + word_y_offset - padding)
93
+ img.paste(rotated, (paste_x, paste_y), rotated)
94
+ else:
95
+ draw.text(
96
+ (x_word_position + word_x_offset, y_line_position + word_y_offset),
97
+ word, fill=word_color, font=font
98
+ )
99
+ x_word_position += word_width + space_width
100
+ line_spacing_factor = 1.0 + (random.uniform(-0.1, 0.1) * params["line_spacing"])
101
+ y_position += int(font_size * 1.2 * line_spacing_factor)
102
+
103
+ if output_path is not None:
104
+ img.save(output_path)
105
+ logger.info(f"Saved rendered Sanskrit to {output_path}")
106
+
107
+ return img
108
+
109
+ except Exception as e:
110
+ logger.error(f"Error rendering text with font {font_path}: {e}")
111
+ return None
112
+
backend/app/services/synthetic/transformations.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Transformations module for geometric transformations and post-processing effects
3
+ """
4
+
5
+ import cv2
6
+ import os
7
+ import random
8
+ import logging
9
+ import itertools
10
+ from math import pi
11
+ from typing import List, Dict
12
+ import numpy as np
13
+ from PIL import Image, ImageEnhance, ImageFilter
14
+ from .effects import AdvancedImageEffects, generate_random_fold_lines, safe_apply_effect
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def cylindrical_edge_warp(pil_img: Image.Image, side: str = "left", strength: float = 0.6, warp_portion: float = 0.45) -> Image.Image:
20
+ try:
21
+ img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
22
+ h, w = img.shape[:2]
23
+ W = int(warp_portion * w)
24
+ R = W / strength if strength != 0 else 1e9
25
+ X, Y = np.meshgrid(np.arange(w), np.arange(h))
26
+ map_x = X.astype(np.float32).copy()
27
+ map_y = Y.astype(np.float32).copy()
28
+ if side == "left":
29
+ strip = X < W
30
+ dx = W - X[strip]
31
+ else:
32
+ strip = X > (w - W)
33
+ dx = X[strip] - (w - W)
34
+ theta = dx / R
35
+ displacement = R * np.sin(theta) - dx
36
+ map_x[strip] += displacement
37
+ scale_y = np.cos(theta)
38
+ map_y[strip] = (Y[strip] - h/2) / scale_y + h/2
39
+ warped = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
40
+ return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
41
+ except Exception as e:
42
+ logger.error(f"Error in cylindrical warp: {e}")
43
+ return pil_img
44
+
45
+
46
+ def washboard_warp(pil_img: Image.Image, amplitude: float = 8, wavelength: float = 120, phase: float = 0.0, decay_from_top: bool = True) -> Image.Image:
47
+ try:
48
+ img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
49
+ h, w = img.shape[:2]
50
+ x = np.arange(w, dtype=np.float32)
51
+ dy = amplitude * np.sin(2*pi*x / wavelength + phase)
52
+ if decay_from_top:
53
+ atten = np.linspace(1, 0.2, h, dtype=np.float32)[:, None]
54
+ else:
55
+ atten = 1.0
56
+ map_x, map_y = np.meshgrid(x, np.arange(h, dtype=np.float32))
57
+ map_y += dy * atten
58
+ warped = cv2.remap(img, map_x, map_y, cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
59
+ return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
60
+ except Exception as e:
61
+ logger.error(f"Error in washboard warp: {e}")
62
+ return pil_img
63
+
64
+
65
+ def apply_enhanced_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict) -> List[Image.Image]:
66
+ all_images = [original_image]
67
+ transforms = []
68
+
69
+ def rotate_image(img, angle):
70
+ bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
71
+ return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
72
+
73
+ def adjust_brightness(img, factor):
74
+ enhancer = ImageEnhance.Brightness(img)
75
+ return enhancer.enhance(factor)
76
+
77
+ def adjust_contrast(img, factor):
78
+ enhancer = ImageEnhance.Contrast(img)
79
+ return enhancer.enhance(factor)
80
+
81
+ def add_noise(img, intensity):
82
+ img_array = np.array(img).astype(np.float32)
83
+ noise = np.random.normal(0, intensity * 255, img_array.shape)
84
+ noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
85
+ return Image.fromarray(noisy_array)
86
+
87
+ def blur_image(img, radius):
88
+ return img.filter(ImageFilter.GaussianBlur(radius=radius))
89
+
90
+ transforms.append(("rotate", lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"]))))
91
+ transforms.append(("brightness", lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"]))))
92
+ transforms.append(("contrast", lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"]))))
93
+ transforms.append(("noise", lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"]))))
94
+ transforms.append(("blur", lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"]))))
95
+
96
+ transforms.append(("washboard", lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False]))))
97
+ transforms.append(("cylinder", lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5))))
98
+
99
+ if params.get('enable_advanced_effects', True):
100
+ if random.random() < params.get('fold_probability', 0.4):
101
+ transforms.append(("fold_crease", lambda img: Image.fromarray(
102
+ AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))
103
+ )))
104
+ if random.random() < params.get('advanced_effect_probability', 0.7):
105
+ transforms.append(("ink_bleed", lambda img: Image.fromarray(
106
+ AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))
107
+ )))
108
+ if random.random() < params.get('perspective_probability', 0.5):
109
+ transforms.append(("perspective", lambda img: Image.fromarray(
110
+ AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))
111
+ )))
112
+ if random.random() < params.get('shadow_probability', 0.6):
113
+ transforms.append(("shadow_cast", lambda img: Image.fromarray(
114
+ AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))
115
+ )))
116
+ if random.random() < params.get('advanced_effect_probability', 0.7):
117
+ transforms.append(("morphological", lambda img: Image.fromarray(
118
+ AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))
119
+ )))
120
+ if params.get('scanner_artifacts', True) and random.random() < 0.3:
121
+ transforms.append(("scanner_artifacts", lambda img: Image.fromarray(
122
+ AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))
123
+ )))
124
+ if random.random() < 0.3:
125
+ transforms.append(("lens_distortion", lambda img: Image.fromarray(
126
+ AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))
127
+ )))
128
+
129
+ if params["all_transforms"]:
130
+ selected_transforms = transforms
131
+ else:
132
+ n_transforms = random.randint(1, min(5, len(transforms)))
133
+ selected_transforms = random.sample(transforms, n_transforms)
134
+
135
+ for transform_name, transform_func in selected_transforms:
136
+ try:
137
+ transformed_img = safe_apply_effect(transform_func, original_image, transform_name)
138
+ if output_dir:
139
+ transformed_filename = f"{base_filename}_{transform_name}.png"
140
+ transformed_path = os.path.join(output_dir, transformed_filename)
141
+ transformed_img.save(transformed_path)
142
+ logger.info(f"Saved transformed image to {transformed_path}")
143
+ all_images.append(transformed_img)
144
+ except Exception as e:
145
+ logger.error(f"Error applying transform {transform_name}: {e}")
146
+
147
+ if len(selected_transforms) > 1:
148
+ try:
149
+ combined_img = original_image.copy()
150
+ for _, transform_func in selected_transforms:
151
+ combined_img = safe_apply_effect(transform_func, combined_img, "combined")
152
+ if output_dir:
153
+ combined_path = os.path.join(output_dir, f"{base_filename}_combined.png")
154
+ combined_img.save(combined_path)
155
+ logger.info(f"Saved combined transformation to {combined_path}")
156
+ all_images.append(combined_img)
157
+ except Exception as e:
158
+ logger.error(f"Error creating combined transformation: {e}")
159
+
160
+ return all_images
161
+
162
+
163
+ def create_comprehensive_effect_combinations():
164
+ base_effects = ["rotate", "brightness", "contrast", "noise", "blur"]
165
+ geometric_effects = ["washboard", "cylinder"]
166
+ advanced_effects = [
167
+ "fold_crease",
168
+ "ink_bleed",
169
+ "perspective",
170
+ "shadow_cast",
171
+ "morphological",
172
+ "scanner_artifacts",
173
+ "lens_distortion",
174
+ ]
175
+ effect_combinations = []
176
+ for effect in base_effects + geometric_effects + advanced_effects:
177
+ effect_combinations.append([effect])
178
+ for combo in itertools.combinations(advanced_effects, 2):
179
+ effect_combinations.append(list(combo))
180
+ for geo in geometric_effects:
181
+ for adv in advanced_effects:
182
+ effect_combinations.append([geo, adv])
183
+ for combo in itertools.combinations(advanced_effects, 3):
184
+ effect_combinations.append(list(combo))
185
+ effect_combinations.append(advanced_effects[:4])
186
+ effect_combinations.append(advanced_effects[4:])
187
+ effect_combinations.append(advanced_effects)
188
+ return effect_combinations
189
+
190
+
191
+ def apply_systematic_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict, effect_combination: List[str] = None) -> List[Image.Image]:
192
+ all_images = [original_image]
193
+
194
+ def rotate_image(img, angle):
195
+ bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
196
+ return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
197
+
198
+ def adjust_brightness(img, factor):
199
+ enhancer = ImageEnhance.Brightness(img)
200
+ return enhancer.enhance(factor)
201
+
202
+ def adjust_contrast(img, factor):
203
+ enhancer = ImageEnhance.Contrast(img)
204
+ return enhancer.enhance(factor)
205
+
206
+ def add_noise(img, intensity):
207
+ img_array = np.array(img).astype(np.float32)
208
+ noise = np.random.normal(0, intensity * 255, img_array.shape)
209
+ noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
210
+ return Image.fromarray(noisy_array)
211
+
212
+ def blur_image(img, radius):
213
+ return img.filter(ImageFilter.GaussianBlur(radius=radius))
214
+
215
+ transforms = {
216
+ "rotate": lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"])),
217
+ "brightness": lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"])),
218
+ "contrast": lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"])),
219
+ "noise": lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"])),
220
+ "blur": lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"])) ,
221
+ "washboard": lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False])),
222
+ "cylinder": lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5)),
223
+ "fold_crease": lambda img: Image.fromarray(AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))),
224
+ "ink_bleed": lambda img: Image.fromarray(AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))),
225
+ "perspective": lambda img: Image.fromarray(AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))),
226
+ "shadow_cast": lambda img: Image.fromarray(AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))),
227
+ "morphological": lambda img: Image.fromarray(AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))),
228
+ "scanner_artifacts": lambda img: Image.fromarray(AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))),
229
+ "lens_distortion": lambda img: Image.fromarray(AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))),
230
+ }
231
+
232
+ current_image = original_image
233
+ for effect_name in ["rotate", "brightness", "contrast", "noise", "blur"]:
234
+ current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
235
+
236
+ if effect_combination:
237
+ for effect_name in effect_combination:
238
+ if effect_name in transforms:
239
+ current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
240
+ if output_dir:
241
+ combo_name = "_".join(effect_combination)
242
+ filename = f"{base_filename}_{combo_name}.png"
243
+ filepath = os.path.join(output_dir, filename)
244
+ current_image.save(filepath)
245
+ logger.info(f"Saved combination image: {filepath}")
246
+
247
+ all_images.append(current_image)
248
+ return all_images
249
+
backend/requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pydantic
5
+ google-api-core>=2.0.0
6
+ google-generativeai
7
+
8
+ # Image/text generation stack
9
+ opencv-python
10
+ pillow
11
+ numpy
12
+ scipy
13
+ noise
14
+ pandas
15
+ requests
16
+ matplotlib
17
+ datasets
content/static/NotoSansOriya-Black (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb33fbf1d96373a315468ba4087645cac7fbf3b7f0da9cc5a7fb8d6bbc79f7e3
3
+ size 142472
content/static/NotoSansOriya-Black.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb33fbf1d96373a315468ba4087645cac7fbf3b7f0da9cc5a7fb8d6bbc79f7e3
3
+ size 142472
content/static/NotoSansOriya-Bold (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b904cd4edafaa595e5ba0e76094503b5f9a07967360c32013de63b7b9318e45f
3
+ size 155216
content/static/NotoSansOriya-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b904cd4edafaa595e5ba0e76094503b5f9a07967360c32013de63b7b9318e45f
3
+ size 155216
content/static/NotoSansOriya-ExtraBold (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1f187e6c30d6931ffbee9b144b1396568d68273542b24f80844bfa12a408d4
3
+ size 142676
content/static/NotoSansOriya-ExtraBold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1f187e6c30d6931ffbee9b144b1396568d68273542b24f80844bfa12a408d4
3
+ size 142676
content/static/NotoSansOriya-ExtraLight (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82202255f5b0ceee121dd00ed9fdccf03066283e5774a2e256fdb21c88292bc
3
+ size 155120
content/static/NotoSansOriya-ExtraLight.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82202255f5b0ceee121dd00ed9fdccf03066283e5774a2e256fdb21c88292bc
3
+ size 155120
content/static/NotoSansOriya-Light (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffbf720886730b128dc4d0fb333d7741b22ca36a56d989cdfc7e90002249f88d
3
+ size 155164
content/static/NotoSansOriya-Light.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffbf720886730b128dc4d0fb333d7741b22ca36a56d989cdfc7e90002249f88d
3
+ size 155164
content/static/NotoSansOriya-Medium (2).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0457c076cdb02f963737648a248ff74c3f3ac29aca2d2e5c3266b9d163fb22
3
+ size 155148
content/static/NotoSansOriya-Medium.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0457c076cdb02f963737648a248ff74c3f3ac29aca2d2e5c3266b9d163fb22
3
+ size 155148
content/static/NotoSansOriya-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5d16377ee01703170468402ad02d15595c77150bf62b4c31c66d3e79ad58039
3
+ size 154960
content/static/NotoSansOriya-SemiBold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e86666b102c911434003af8b93442f2136f19fceab56ba940473a76c28d6801
3
+ size 155176
content/static/NotoSansOriya-Thin.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0fc04ba3330cca8d95d644a78829ad32195488df574aae4b18e09743023409
3
+ size 154372
content/static/NotoSansOriya_Condensed-Black.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2f25042a7500ecbb04b5b4672a7e6549b3d837a44fe9799012b82f1740709a
3
+ size 142696
content/static/NotoSansOriya_Condensed-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9d953ba9a0055476c14a688073ed4fd91ebfd0bdaad9c83ff2392a30463d17
3
+ size 155080
content/static/NotoSansOriya_Condensed-ExtraBold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cd61a8ae71154ee5076fd872854a60e2468c9777fe4b62b391ebd080ff9931
3
+ size 142852
content/static/NotoSansOriya_Condensed-ExtraLight.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8275fdea5c4a89380e5139d3cfafa43cc6f9f38d98cc4dc19a0386382f3ec65f
3
+ size 154960
content/static/NotoSansOriya_Condensed-Light.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6e58df83c60b8aaff9652f8c1e728f6db5fd559d0c4ef3ba940d32c097ed32
3
+ size 154964
content/static/NotoSansOriya_Condensed-Medium.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8e25c863a076f16b414027d92a36b3c8ae879421548013e8bfdc214671bc1b
3
+ size 155092
content/static/NotoSansOriya_Condensed-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac6263cb259109278a7a4f94ed5dffa6f5b68755304fe1812912eff7d31dc332
3
+ size 154976
content/static/NotoSansOriya_Condensed-SemiBold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e563557bc6c181cf446eaa1ddf40ec3e7f06a22085a8d1833cf6e1499c7ef9
3
+ size 155152
content/static/NotoSansOriya_Condensed-Thin.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82faf95ee90bd0b5cc56faba1fa888ae30b9984983b00c77b6c0157ba1df35d5
3
+ size 154340
content/static/NotoSansOriya_ExtraCondensed-Black.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab791dee9aeb1cd0bb66d98f3387e51ef2c279863c9af207a6048f34fa433e9
3
+ size 142288
content/static/NotoSansOriya_ExtraCondensed-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8ec663c3c038aa2e0e8f9c4cb6530d8b390d6610bb47f8064abdc8c0f00ebe
3
+ size 154828
content/static/NotoSansOriya_ExtraCondensed-ExtraBold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536dcb3da1f946d0ba445327b4f3dc91762ebdfa71dd3edfe8fca8fba60171d7
3
+ size 142396