Spaces:

Sk4467
/

odia_ocr_annotation_synthetic_app

Sleeping

App Files Files Community

Shahid commited on Aug 29

Commit

82e5025

0 Parent(s):

Initial clean history with LFS + unified app

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +17 -0
.gitattributes +9 -0
.gitignore +208 -0
Dockerfile +49 -0
README.md +159 -0
backend/app/__init__.py +2 -0
backend/app/api/__init__.py +2 -0
backend/app/api/routers/__init__.py +2 -0
backend/app/api/routers/ocr.py +134 -0
backend/app/api/routers/synthetic.py +122 -0
backend/app/data/annotations/annotations.csv +1 -0
backend/app/main.py +53 -0
backend/app/services/annotations.py +75 -0
backend/app/services/ocr_processor.py +102 -0
backend/app/services/synthetic/__init__.py +22 -0
backend/app/services/synthetic/backgrounds.py +129 -0
backend/app/services/synthetic/config.py +61 -0
backend/app/services/synthetic/core.py +230 -0
backend/app/services/synthetic/effects.py +218 -0
backend/app/services/synthetic/huggingface_processor.py +228 -0
backend/app/services/synthetic/text_renderer.py +112 -0
backend/app/services/synthetic/transformations.py +249 -0
backend/requirements.txt +17 -0
content/static/NotoSansOriya-Black (2).ttf +3 -0
content/static/NotoSansOriya-Black.ttf +3 -0
content/static/NotoSansOriya-Bold (2).ttf +3 -0
content/static/NotoSansOriya-Bold.ttf +3 -0
content/static/NotoSansOriya-ExtraBold (2).ttf +3 -0
content/static/NotoSansOriya-ExtraBold.ttf +3 -0
content/static/NotoSansOriya-ExtraLight (2).ttf +3 -0
content/static/NotoSansOriya-ExtraLight.ttf +3 -0
content/static/NotoSansOriya-Light (2).ttf +3 -0
content/static/NotoSansOriya-Light.ttf +3 -0
content/static/NotoSansOriya-Medium (2).ttf +3 -0
content/static/NotoSansOriya-Medium.ttf +3 -0
content/static/NotoSansOriya-Regular.ttf +3 -0
content/static/NotoSansOriya-SemiBold.ttf +3 -0
content/static/NotoSansOriya-Thin.ttf +3 -0
content/static/NotoSansOriya_Condensed-Black.ttf +3 -0
content/static/NotoSansOriya_Condensed-Bold.ttf +3 -0
content/static/NotoSansOriya_Condensed-ExtraBold.ttf +3 -0
content/static/NotoSansOriya_Condensed-ExtraLight.ttf +3 -0
content/static/NotoSansOriya_Condensed-Light.ttf +3 -0
content/static/NotoSansOriya_Condensed-Medium.ttf +3 -0
content/static/NotoSansOriya_Condensed-Regular.ttf +3 -0
content/static/NotoSansOriya_Condensed-SemiBold.ttf +3 -0
content/static/NotoSansOriya_Condensed-Thin.ttf +3 -0
content/static/NotoSansOriya_ExtraCondensed-Black.ttf +3 -0
content/static/NotoSansOriya_ExtraCondensed-Bold.ttf +3 -0
content/static/NotoSansOriya_ExtraCondensed-ExtraBold.ttf +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.git
+**/__pycache__
+**/*.pyc
+node_modules
+frontend/node_modules
+frontend/.vite
+frontend/dist
+.DS_Store
+.vscode
+.idea
+**/.pytest_cache
+**/.mypy_cache
+**/.ruff_cache
+**/.cache
+# Data should come from volume in Spaces
+backend/data

.gitattributes ADDED Viewed

	@@ -0,0 +1,9 @@

+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.svg filter=lfs diff=lfs merge=lfs -text
+.ttf filter=lfs diff=lfs merge=lfs -text
+.png filter=lfs diff=lfs merge=lfs -text
+.jpg filter=lfs diff=lfs merge=lfs -text
+.jpeg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,208 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+backend/app/data/*

Dockerfile ADDED Viewed

	@@ -0,0 +1,49 @@

+# Multi-stage build for Hugging Face Spaces (Docker) with single URL serving
+# --- Frontend build stage ---
+FROM node:20-alpine AS frontend-build
+WORKDIR /app
+COPY frontend/package*.json ./
+RUN npm ci
+COPY frontend ./
+RUN npm run build
+# --- Backend stage ---
+FROM python:3.11-slim AS runtime
+# System deps for OpenCV and general libs
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    build-essential \
+    python3-dev \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install backend deps
+COPY backend/requirements.txt /app/backend/requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r /app/backend/requirements.txt
+# Copy application code
+COPY backend /app/backend
+COPY content /app/content
+# Copy built frontend
+COPY --from=frontend-build /app/dist /app/frontend_dist
+# Environment
+ENV PORT=7860 \
+    DATA_DIR=/data \
+    FRONTEND_DIST=/app/frontend_dist
+EXPOSE 7860
+# Run FastAPI (serve API + static frontend)
+CMD ["python", "-m", "uvicorn", "app.main:app", "--app-dir", "backend", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+# Odia OCR Annotation + Synthetic Text Generator
+A unified repository that provides:
+- An OCR annotation tool (React frontend + FastAPI backend) to upload images, run OCR via Gemini, edit validated text, and export CSVs.
+- A synthetic text generator (exposed via backend API) to render Odia/Sanskrit-like text with realistic paper/effects, including HuggingFace dataset processing.
+## Repository Structure
+- `backend/`
+  - `app/main.py`: FastAPI app with two routers: `/api/ocr` and `/api/synthetic`
+  - `app/api/routers/ocr.py`: OCR endpoints (upload, OCR, annotations import/export)
+  - `app/api/routers/synthetic.py`: Synthetic generation endpoints
+  - `app/services/`: Shared services
+    - `ocr_processor.py`: Gemini OCR
+    - `annotations.py`: CSV/JSON I/O
+    - `synthetic/`: generator modules (config, core, effects, backgrounds, text_renderer, transformations, huggingface_processor)
+  - `data/`: runtime storage
+    - `uploaded_images/`: uploaded images (served at `/images`)
+    - `annotations/`: `annotations.csv` and JSON
+    - `synth_outputs/`: generated images and CSVs (served at `/static/synthetic`)
+  - `requirements.txt`: backend dependencies
+- `frontend/`
+  - Vite + React + Tailwind app
+  - Routes: `/ocr` (annotation UI) and `/synthetic` (generator UI)
+- `content/static/`: NotoSans Oriya fonts used by generator
+## Run Locally
+1) Backend
+- `pip install -r backend/requirements.txt`
+- From `backend/`: `uvicorn app.main:app --reload`
+- Static mounts:
+  - `/images` → `backend/data/uploaded_images`
+  - `/static/synthetic` → `backend/data/synth_outputs`
+2) Frontend
+- `cd frontend && npm install && npm run dev`
+- Open `http://localhost:5173`
+- Use navigation to switch between OCR and Synthetic pages
+## OCR API (FastAPI)
+- `POST /api/ocr/upload`:
+  - Multipart files field: `files`
+  - Stores images in `backend/data/uploaded_images`
+- `POST /api/ocr/process`:
+  - JSON: `{ "api_key": "<GEMINI_KEY>", "image_filenames": ["img1.png", ...] }`
+  - Returns: `{ "img1.png": "extracted text", ... }`
+- `GET /api/ocr/annotations`:
+  - Returns current annotations, valid/missing images
+- `POST /api/ocr/save`:
+  - JSON: `{ "<filename>": { "extracted_text": "...", "validated_text": "..." } }`
+  - Saves to CSV and JSON in `backend/data/annotations`
+- `POST /api/ocr/import`:
+  - Multipart: `file` (CSV), `image_folder` (e.g., `uploaded_images`)
+  - Validates and returns annotations + image presence
+- `POST /api/ocr/export`:
+  - JSON: `{ annotations: {...}, validated_texts: {...} }`
+  - Returns a downloadable CSV
+Note: Legacy endpoints (`/upload/`, `/process-ocr/`, etc.) are temporarily supported for the older UI. Prefer `/api/ocr/...` going forward.
+## Synthetic API (FastAPI)
+- `POST /api/synthetic/generate`
+  - Modes: `single`, `comprehensive`, `ultra-realistic`, `huggingface`
+  - Request body examples:
+    - Non-HF:
+      `{ "mode": "single", "text": "some Odia text", "output_subdir": "demo_run_01" }`
+    - HF CSV:
+      `{ "mode": "huggingface", "dataset_url": "https://.../data.csv", "text_column": "text", "max_samples": 100, "output_subdir": "hf_demo" }`
+  - Response:
+    - Non-HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>" }`
+    - HF: `{ "status": "ok", "output_dir": "/static/synthetic/<job_id>", "csv": "/static/synthetic/<job_id>/dataset.csv", "images_dir": "/static/synthetic/<job_id>/images" }`
+  - Outputs are stored under `backend/data/synth_outputs/<job_id>/` and publicly served at `/static/synthetic/<job_id>/...`.
+## Fonts
+- Generator uses fonts from `content/static/`.
+- Default: `NotoSansOriya_Condensed-Regular.ttf` (configurable). Ensure the directory exists.
+## Effects & Styles
+- Paper styles: lined paper, old paper, birch, parchment
+- Effects: rotation, brightness/contrast/noise/blur, fold/crease, ink bleed, perspective, shadows, morphological ops, scanner artifacts, lens distortion, washboard/cylinder warps
+## Notes
+- The backend expects the Gemini API key to be provided per-request to `/api/ocr/process`. Do not hardcode keys server-side.
+- For HuggingFace datasets, the backend uses `datasets` when possible, or downloads raw CSV URLs.
+- You can browse generated outputs via the links returned by `/api/synthetic/generate`.
+## Deploy to Hugging Face Spaces (Docker)
+This repo includes a multi-stage Dockerfile to deploy both backend and the built frontend as a single Space.
+Steps:
+- Create a new Space → Type: Docker
+- Push this repository to the Space
+- In Space Settings:
+  - Enable Persistent Storage
+  - (Optional) Add Secrets/Env Vars as needed, e.g., `DATA_DIR=/data` (default already) and `FRONTEND_DIST=/app/frontend_dist`
+- The container exposes port `7860` by default.
+What the image does:
+- Builds the frontend (`frontend/`) and copies the `dist/` to `/app/frontend_dist`
+- Installs backend dependencies and runs `uvicorn app.main:app` from `backend/`
+- Serves:
+  - API at `/api/...`
+  - Uploaded images at `/images`
+  - Synthetic outputs at `/static/synthetic`
+  - Frontend SPA at `/` (served from `/app/frontend_dist`)
+1. **Paper Textures**: Realistic paper fiber patterns using Perlin noise
+2. **Aging Effects**: Edge darkening and aging patterns
+3. **Physical Damage**: Fold lines, creases, and ink bleeding
+4. **Scanner Artifacts**: Dust, compression artifacts, scanning lines
+5. **Geometric Distortions**: Perspective changes, cylindrical warping
+6. **Lighting Effects**: Shadows and lens distortions
+## Font Requirements
+The generator requires appropriate fonts for text rendering. Default configuration expects:
+- Font directory: `/content/static/`
+- Font file: `NotoSansOriya_ExtraCondensed-Regular.ttf`
+You can specify custom fonts using `--font-dir` and `--font` parameters.
+## Performance Tips
+- Use `--max-samples` to limit processing for large datasets
+- Disable advanced effects with `--no-advanced-effects` for faster generation
+- Use multiprocessing with `--use-multiprocessing` for batch jobs
+- Adjust image dimensions to balance quality and speed
+## Error Handling
+The package includes comprehensive error handling:
+- Graceful fallbacks for missing dependencies
+- Detailed logging for debugging
+- Validation of input parameters
+- Safe handling of malformed datasets
+## Contributing
+The modular structure makes it easy to extend:
+- Add new effects in `effects.py`
+- Implement new background styles in `backgrounds.py`
+- Create custom transformations in `transformations.py`
+- Extend dataset processing in `huggingface_processor.py`
+## License
+[Add your license information here]
+---
+**Note**: This is a complete rewrite of the original monolithic code into a modular, extensible package with added HuggingFace dataset processing capabilities.

backend/app/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Makes 'app' a package
2	+

backend/app/api/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # api package
2	+

backend/app/api/routers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # routers package
2	+

backend/app/api/routers/ocr.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from fastapi import APIRouter, UploadFile, File, Form, HTTPException
+from fastapi.responses import FileResponse
+from typing import List, Dict
+import os
+import shutil
+from ...services.annotations import (
+    load_annotations_from_csv,
+    save_annotations_to_csv,
+    save_annotations,
+)
+from ...services.ocr_processor import batch_run_ocr
+router = APIRouter(prefix="/api/ocr", tags=["ocr"])
+SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
+DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
+UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
+ANNOTATIONS_DIR = os.path.join(DATA_DIR, "annotations")
+ANNOTATION_CSV_PATH = os.path.join(ANNOTATIONS_DIR, "annotations.csv")
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
+# Ensure CSV exists
+if not os.path.exists(ANNOTATION_CSV_PATH):
+    with open(ANNOTATION_CSV_PATH, 'w', encoding='utf-8-sig') as f:
+        f.write('image_filename,extracted_text,validated_text\n')
+@router.post("/upload")
+async def upload_images(files: List[UploadFile] = File(...)):
+    image_names: List[str] = []
+    for file in files:
+        ext = file.filename.split('.')[-1].lower()
+        if ext not in SUPPORTED_IMAGE_TYPES:
+            continue
+        safe_name = os.path.basename(file.filename)
+        path = os.path.join(UPLOAD_DIR, safe_name)
+        with open(path, "wb") as f:
+            f.write(await file.read())
+        image_names.append(safe_name)
+    return {"status": "success", "images": image_names}
+@router.post("/process")
+def process_ocr(request: Dict[str, object]):
+    api_key = str(request.get("api_key", ""))
+    image_filenames = list(request.get("image_filenames", []))
+    if not api_key:
+        raise HTTPException(status_code=400, detail="api_key is required")
+    if not image_filenames:
+        raise HTTPException(status_code=400, detail="image_filenames is required")
+    results = batch_run_ocr(image_filenames, UPLOAD_DIR, api_key)
+    return results
+@router.get("/annotations")
+def get_annotations():
+    try:
+        annotations, valid_images, missing_images = load_annotations_from_csv(ANNOTATION_CSV_PATH, UPLOAD_DIR)
+        return {
+            "annotations": annotations,
+            "valid_images": valid_images,
+            "missing_images": missing_images
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/save")
+def save_annotated(data: dict):
+    try:
+        save_annotations(ANNOTATION_CSV_PATH, data)
+        return {"status": "saved"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/import")
+async def import_csv(file: UploadFile = File(...), image_folder: str = Form("uploaded_images")):
+    temp_dir = os.path.join(DATA_DIR, "temp")
+    os.makedirs(temp_dir, exist_ok=True)
+    temp_path = os.path.join(temp_dir, os.path.basename(file.filename))
+    with open(temp_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    try:
+        # If relative folder, resolve within DATA_DIR
+        folder = image_folder
+        if not os.path.isabs(folder):
+            folder = os.path.join(DATA_DIR, folder)
+        annotations, valid_images, missing_images = load_annotations_from_csv(temp_path, folder)
+        return {
+            "annotations": annotations,
+            "valid_images": valid_images,
+            "missing_images": missing_images
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/export")
+async def export_csv(request: Dict[str, dict]):
+    try:
+        annotations = request.get("annotations", {})
+        validated_texts = request.get("validated_texts", {})
+        combined_data: Dict[str, Dict[str, str]] = {}
+        for image_name in annotations.keys():
+            combined_data[image_name] = {
+                "extracted_text": annotations[image_name],
+                "validated_text": validated_texts.get(image_name, "")
+            }
+        save_annotations_to_csv(ANNOTATION_CSV_PATH, combined_data)
+        return FileResponse(
+            ANNOTATION_CSV_PATH,
+            media_type='text/csv',
+            filename='annotations.csv'
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Legacy routes removed after frontend migration to /api/ocr

backend/app/api/routers/synthetic.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from fastapi import APIRouter, HTTPException
+from typing import Dict, Optional
+import os
+import uuid
+from ...services.synthetic.config import ENHANCED_DEFAULT_PARAMS
+from ...services.synthetic.core import (
+    generate_enhanced_sanskrit_samples,
+    generate_comprehensive_dataset,
+    generate_ultra_realistic_samples,
+)
+from ...services.synthetic.huggingface_processor import HuggingFaceDatasetProcessor
+router = APIRouter(prefix="/api/synthetic", tags=["synthetic"])
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
+DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
+SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
+FONTS_DIR = os.path.abspath(os.path.join(BASE_DIR, os.pardir, os.pardir, "content", "static"))
+os.makedirs(SYN_OUT_DIR, exist_ok=True)
+def normalized_params(incoming: Optional[Dict]) -> Dict:
+    params = ENHANCED_DEFAULT_PARAMS.copy()
+    if incoming:
+        # convert hyphen keys to underscore if any
+        normalized = {k.replace('-', '_'): v for k, v in incoming.items()}
+        params.update(normalized)
+    # Force font_dir to repo fonts absolute path for backend execution
+    params['font_dir'] = FONTS_DIR
+    return params
+@router.post("/generate")
+def generate(request: Dict[str, object]):
+    """
+    Universal generation endpoint.
+    Body:
+    {
+      mode: 'single' | 'comprehensive' | 'ultra-realistic' | 'huggingface',
+      text?: string (for non-HF modes),
+      output_subdir?: string,
+      params?: object,
+      text_column?: string (HF),
+      max_samples?: int (HF),
+      dataset_url?: string (HF) OR csv_file?: string (server-side path)
+    }
+    Returns paths relative to /static/synthetic when applicable.
+    """
+    mode = str(request.get("mode", "single"))
+    text = str(request.get("text", "")).strip() or "କବି ସମ୍ରାଟ ଉପେନ୍ଦ୍ର ଭଞ୍ଜ ..."
+    output_subdir = str(request.get("output_subdir", ""))
+    params = normalized_params(request.get("params"))
+    # Resolve output dir under synth_outputs
+    job_id = output_subdir or str(uuid.uuid4())
+    out_dir = os.path.join(SYN_OUT_DIR, job_id)
+    os.makedirs(out_dir, exist_ok=True)
+    try:
+        if mode == 'single':
+            generate_enhanced_sanskrit_samples(
+                text=text,
+                font_path=os.path.join(params['font_dir'], params['font']),
+                output_dir=out_dir,
+                params=params,
+            )
+            return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
+        elif mode == 'comprehensive':
+            generate_comprehensive_dataset(
+                text=text,
+                output_dir=out_dir,
+                params=params,
+            )
+            return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
+        elif mode == 'ultra-realistic':
+            generate_ultra_realistic_samples(
+                text=text,
+                output_dir=out_dir,
+                style_focus=request.get("style_focus"),
+                params=params,
+            )
+            return {"status": "ok", "output_dir": f"/static/synthetic/{job_id}"}
+        elif mode == 'huggingface':
+            text_column = str(request.get("text_column", "text"))
+            max_samples = request.get("max_samples")
+            dataset_url = request.get("dataset_url")
+            csv_file = request.get("csv_file")
+            processor = HuggingFaceDatasetProcessor(output_dir=out_dir, params=params)
+            if csv_file:
+                ok = processor.process_local_csv(csv_path=csv_file, text_column=text_column, max_samples=max_samples)
+            elif dataset_url:
+                ok = processor.process_huggingface_dataset(dataset_identifier=dataset_url, text_column=text_column, max_samples=max_samples)
+            else:
+                raise HTTPException(status_code=400, detail="Provide dataset_url or csv_file for huggingface mode")
+            if not ok:
+                raise HTTPException(status_code=500, detail="HuggingFace processing failed")
+            return {
+                "status": "ok",
+                "output_dir": f"/static/synthetic/{job_id}",
+                "csv": f"/static/synthetic/{job_id}/dataset.csv",
+                "images_dir": f"/static/synthetic/{job_id}/images"
+            }
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown mode: {mode}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

backend/app/data/annotations/annotations.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ image_filename,extracted_text,validated_text

backend/app/main.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+import os
+from .api.routers.ocr import router as ocr_router
+from .api.routers.synthetic import router as synthetic_router
+app = FastAPI(title="Unified Backend: OCR + Synthetic")
+# CORS (dev-friendly; tighten for prod)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Storage paths (can be overridden by env, e.g., DATA_DIR=/data in Docker)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+DEFAULT_DATA_DIR = os.path.join(BASE_DIR, "data")
+DATA_DIR = os.getenv("DATA_DIR", DEFAULT_DATA_DIR)
+UPLOAD_DIR = os.path.join(DATA_DIR, "uploaded_images")
+SYN_OUT_DIR = os.path.join(DATA_DIR, "synth_outputs")
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(SYN_OUT_DIR, exist_ok=True)
+# Routers
+app.include_router(ocr_router)
+app.include_router(synthetic_router)
+# Static mounts for data
+app.mount("/images", StaticFiles(directory=UPLOAD_DIR), name="images")
+app.mount("/static/synthetic", StaticFiles(directory=SYN_OUT_DIR), name="synthetic")
+# Serve compiled frontend (if provided via env FRONTEND_DIST)
+FRONTEND_DIST = os.getenv("FRONTEND_DIST")
+if FRONTEND_DIST and os.path.isdir(FRONTEND_DIST):
+    app.mount("/", StaticFiles(directory=FRONTEND_DIST, html=True), name="frontend")
+@app.get("/")
+def root():
+    return {"message": "Unified backend is running", "routes": ["/api/ocr", "/api/synthetic"]}
+# Legacy compatibility (optional):
+# If you want to keep old OCR paths working without frontend changes,
+# you can import and map handlers or create thin wrappers here.
+# For now, keep frontend updates in a later step.

backend/app/services/annotations.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import json
+import pandas as pd
+import numpy as np
+from typing import Tuple, Dict, List
+class CustomJSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, (np.float32, np.float64)):
+            return float(obj)
+        if isinstance(obj, (np.int32, np.int64)):
+            return int(obj)
+        return super().default(obj)
+def load_annotations(path: str) -> Dict:
+    if not os.path.exists(path):
+        return {}
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+def save_annotations(path: str, data: Dict):
+    """Save annotations to both JSON and CSV formats."""
+    # Save to JSON
+    json_path = path.replace('.csv', '.json')
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2, cls=CustomJSONEncoder)
+    # Save to CSV
+    save_annotations_to_csv(path, data)
+def load_annotations_from_csv(csv_file: str, image_folder: str) -> Tuple[Dict, List[str], List[str]]:
+    if not os.path.exists(csv_file):
+        return {}, [], []
+    df = pd.read_csv(csv_file, encoding='utf-8-sig')
+    if 'image_filename' not in df.columns:
+        raise ValueError("CSV must contain 'image_filename' column.")
+    annotations: Dict[str, Dict[str, str]] = {}
+    valid_images: List[str] = []
+    missing_images: List[str] = []
+    for _, row in df.iterrows():
+        filename = row['image_filename']
+        image_path = os.path.join(image_folder, filename)
+        if os.path.exists(image_path):
+            annotations[filename] = {
+                'extracted_text': str(row.get('extracted_text', '')),
+                'validated_text': str(row.get('validated_text', row.get('extracted_text', '')))
+            }
+            valid_images.append(filename)
+        else:
+            missing_images.append(filename)
+    return annotations, valid_images, missing_images
+def save_annotations_to_csv(csv_file: str, annotations: Dict[str, Dict[str, str]]):
+    data = [
+        {
+            'image_filename': filename,
+            'extracted_text': str(ann.get('extracted_text', '')),
+            'validated_text': str(ann.get('validated_text', ''))
+        }
+        for filename, ann in annotations.items()
+    ]
+    df = pd.DataFrame(data)
+    os.makedirs(os.path.dirname(csv_file), exist_ok=True)
+    df.to_csv(csv_file, index=False, encoding='utf-8-sig')

backend/app/services/ocr_processor.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Unified backend OCR processor using Google Gemini
+import os
+import base64
+import logging
+import time
+from typing import List, Dict, Optional
+import google.generativeai as genai
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(asctime)s] %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+SUPPORTED_IMAGE_TYPES = {"jpg", "jpeg", "png", "bmp", "webp", "tiff"}
+def encode_image_to_base64(image_path: str) -> Optional[str]:
+    if not os.path.exists(image_path):
+        logger.error(f"Image not found: {image_path}")
+        return None
+    try:
+        with open(image_path, "rb") as img_file:
+            return base64.b64encode(img_file.read()).decode("utf-8")
+    except Exception as e:
+        logger.error(f"Failed to read or encode image {image_path}: {e}")
+        return None
+def get_mime_type(image_path: str) -> Optional[str]:
+    ext = image_path.split(".")[-1].lower()
+    if ext in SUPPORTED_IMAGE_TYPES:
+        return f"image/{'jpeg' if ext == 'jpg' else ext}"
+    logger.warning(f"Unsupported image format: {ext}")
+    return None
+def run_gemini_ocr(image_path: str, api_key: str, max_retries: int = 3) -> str:
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel("gemini-1.5-flash")
+    base64_image = encode_image_to_base64(image_path)
+    mime_type = get_mime_type(image_path)
+    if base64_image is None or mime_type is None:
+        return "[Image could not be processed]"
+    prompt = (
+        "Extract all visible Odia (ଓଡ଼ିଆ) text from the image accurately.\n"
+        "Only output the Odia text content. Do not explain or translate anything.\n"
+        "If no Odia text is found, return '[No Odia text found]'."
+    )
+    for attempt in range(max_retries):
+        try:
+            response = model.generate_content(
+                [
+                    prompt,
+                    {
+                        "mime_type": mime_type,
+                        "data": base64_image
+                    }
+                ],
+                generation_config={
+                    "temperature": 0.2,
+                    "max_output_tokens": 2048,
+                    "top_p": 0.8,
+                    "top_k": 40
+                }
+            )
+            text = response.text.strip() if response.text else "[No text extracted]"
+            logger.info(f"OCR complete for {os.path.basename(image_path)}")
+            return text
+        except Exception as e:
+            logger.error(f"OCR attempt {attempt + 1} failed for {image_path}: {e}")
+            if attempt == max_retries - 1:
+                return f"[OCR failed after {max_retries} attempts: {str(e)}]"
+            time.sleep(1)
+def batch_run_ocr(image_filenames: List[str], image_folder: str, api_key: str) -> Dict[str, str]:
+    results: Dict[str, str] = {}
+    logger.info(f"Starting batch OCR on {len(image_filenames)} images.")
+    for filename in image_filenames:
+        image_path = os.path.join(image_folder, filename)
+        if not os.path.exists(image_path):
+            logger.error(f"Image not found: {image_path}")
+            results[filename] = "[Image file not found]"
+            continue
+        results[filename] = run_gemini_ocr(image_path, api_key)
+    logger.info("Batch OCR complete.")
+    return results

backend/app/services/synthetic/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""
+Synthetic Text Generator Package (moved under unified backend services)
+"""
+__version__ = "1.0.0"
+from .config import ENHANCED_DEFAULT_PARAMS
+from .core import (
+    generate_enhanced_sanskrit_samples,
+    generate_comprehensive_dataset,
+    generate_ultra_realistic_samples,
+)
+from .huggingface_processor import HuggingFaceDatasetProcessor
+__all__ = [
+    "ENHANCED_DEFAULT_PARAMS",
+    "generate_enhanced_sanskrit_samples",
+    "generate_comprehensive_dataset",
+    "generate_ultra_realistic_samples",
+    "HuggingFaceDatasetProcessor",
+]

backend/app/services/synthetic/backgrounds.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""
+Background generation module for creating realistic paper textures and backgrounds
+"""
+import os
+import random
+import numpy as np
+from PIL import Image
+from typing import Dict
+import logging
+from .effects import AdvancedImageEffects
+logger = logging.getLogger(__name__)
+def create_enhanced_background(width: int, height: int, style: str, params: Dict) -> Image.Image:
+    if params.get("image_dir") and os.path.exists(params["image_dir"]):
+        image_files = [f for f in os.listdir(params["image_dir"]) if f.lower().endswith((".png", ".jpg", ".jpeg"))]
+        if image_files:
+            img_path = os.path.join(params["image_dir"], random.choice(image_files))
+            try:
+                bg_img = Image.open(img_path).convert('RGB')
+                bg_img = bg_img.resize((width, height), Image.LANCZOS)
+                return bg_img
+            except Exception as e:
+                logger.error(f"Error loading background image {img_path}: {e}")
+    if params.get('fiber_density', 0) > 0:
+        fiber_texture = AdvancedImageEffects.simulate_paper_fiber_texture(width, height, params['fiber_density'])
+    else:
+        fiber_texture = np.zeros((height, width, 3), dtype=np.uint8)
+    if style == "lined_paper":
+        background = np.ones((height, width, 3), dtype=np.uint8) * [210, 180, 140]
+        background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
+        line_spacing = random.randint(15, 25)
+        for y in range(0, height, line_spacing):
+            line_width = random.randint(1, 2)
+            darkness = random.randint(6, 20) * params["texture"]
+            if y + line_width < height:
+                background[y:y+line_width, :, :] = np.clip(background[y:y+line_width, :, :] - darkness, 0, 255)
+        noise = np.random.randint(0, int(15 * params["noise"]), (height, width, 3), dtype=np.uint8)
+        background = np.clip(background - noise, 0, 255).astype(np.uint8)
+        stain_count = int(random.randint(2, 4) * params["stains"])
+        for _ in range(stain_count):
+            x = random.randint(0, width-100)
+            y = random.randint(0, height-100)
+            size = random.randint(20, 60)
+            darkness = random.randint(8, 25) * params["stain_intensity"]
+            stain_mask = np.zeros((size, size), dtype=np.float32)
+            center = size // 2
+            for i in range(size):
+                for j in range(size):
+                    dist = np.sqrt((i - center)**2 + (j - center)**2)
+                    if dist < center:
+                        stain_mask[i, j] = (1 - dist / center) * np.random.uniform(0.4, 1.0)
+            end_y = min(y + size, height)
+            end_x = min(x + size, width)
+            actual_size_y = end_y - y
+            actual_size_x = end_x - x
+            if actual_size_y > 0 and actual_size_x > 0:
+                stain_region = stain_mask[:actual_size_y, :actual_size_x]
+                for c in range(3):
+                    background[y:end_y, x:end_x, c] = np.clip(
+                        background[y:end_y, x:end_x, c] - darkness * stain_region * params["stain_intensity"], 0, 255
+                    )
+    elif style == "old_paper":
+        background = np.ones((height, width, 3), dtype=np.uint8) * [236, 222, 181]
+        background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
+        noise = np.random.randint(0, int(12 * params["noise"]), (height, width, 3), dtype=np.uint8)
+        background = np.clip(background - noise, 0, 255).astype(np.uint8)
+        edge_width = width // 10
+        for i in range(edge_width):
+            factor = (edge_width - i) / edge_width * 15 * params["aging"]
+            aging_noise = np.random.uniform(0.5, 1.5, (height, width))
+            if i < height:
+                background[i, :, 2] = np.clip(background[i, :, 2] - factor * aging_noise[i, :], 0, 255)
+            if height - i - 1 >= 0:
+                background[height-i-1, :, 2] = np.clip(background[height-i-1, :, 2] - factor * aging_noise[height-i-1, :], 0, 255)
+            if i < width:
+                background[:, i, 2] = np.clip(background[:, i, 2] - factor * aging_noise[:, i], 0, 255)
+            if width - i - 1 >= 0:
+                background[:, width-i-1, 2] = np.clip(background[:, width-i-1, 2] - factor * aging_noise[:, width-i-1], 0, 255)
+    elif style == "birch":
+        background = np.ones((height, width, 3), dtype=np.uint8) * [235, 225, 215]
+        background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
+        noise = np.random.randint(0, int(10 * params["noise"]), (height, width, 3), dtype=np.uint8)
+        background = np.clip(background - noise, 0, 255).astype(np.uint8)
+        variation_count = int(150 * params["texture"])
+        for _ in range(variation_count):
+            x = random.randint(0, width-1)
+            y = random.randint(0, height-1)
+            size = random.randint(10, 25)
+            variation = random.randint(-6, 6) * params["texture"]
+            for i in range(-size, size):
+                for j in range(-size, size):
+                    dist = np.sqrt(i*i + j*j)
+                    if dist <= size:
+                        shape_factor = np.random.uniform(0.7, 1.3)
+                        if dist <= size * shape_factor:
+                            yi, xi = y + i, x + j
+                            if 0 <= yi < height and 0 <= xi < width:
+                                background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
+    else:  # parchment
+        background = np.ones((height, width, 3), dtype=np.uint8) * [230, 215, 185]
+        background = np.clip(background.astype(np.float32) - fiber_texture, 0, 255).astype(np.uint8)
+        variation_count = int(400 * params["texture"])
+        for _ in range(variation_count):
+            x = random.randint(0, width-1)
+            y = random.randint(0, height-1)
+            size = random.randint(5, 12)
+            variation = random.randint(-7, 7) * params["texture"]
+            for i in range(-size, size):
+                for j in range(-size, size):
+                    dist = np.sqrt(i*i + j*j)
+                    if dist <= size:
+                        grain_factor = 1 + 0.3 * np.sin(j * 0.5) * np.cos(i * 0.3)
+                        if dist <= size * grain_factor:
+                            yi, xi = y + i, x + j
+                            if 0 <= yi < height and 0 <= xi < width:
+                                background[yi, xi, :] = np.clip(background[yi, xi, :] + variation, 0, 255)
+        noise = np.random.randint(0, int(8 * params["noise"]), (height, width, 3), dtype=np.uint8)
+        background = np.clip(background - noise, 0, 255).astype(np.uint8)
+    return Image.fromarray(background)

backend/app/services/synthetic/config.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+Configuration parameters for the Synthetic Text Generator
+"""
+ENHANCED_DEFAULT_PARAMS = {
+    'width': 400,
+    'height': 320,
+    'base_images': 1,
+    'font_dir': './content/static',
+    'font': 'NotoSansOriya_Condensed-Regular.ttf',
+    'noise': 0.7,
+    'aging': 0.6,
+    'texture': 0.7,
+    'stains': 0.6,
+    'stain_intensity': 0.5,
+    'word_position': 0.6,
+    'ink_color': 0.5,
+    'line_spacing': 0.4,
+    'baseline': 0.3,
+    'word_angle': 0.0,
+    'apply_transforms': True,
+    'all_transforms': False,
+    'rotation_max': 5.0,
+    'brightness_var': 0.2,
+    'contrast_var': 0.2,
+    'noise_min': 0.01,
+    'noise_max': 0.05,
+    'blur_min': 0.5,
+    'blur_max': 1.0,
+    'fold_intensity': 0.3,
+    'bleed_intensity': 0.3,
+    'bleed_radius': 3,
+    'corner_displacement': 20,
+    'morph_operation': 'mixed',
+    'morph_kernel_size': 3,
+    'aging_intensity': 0.5,
+    'fiber_density': 0.5,
+    'enable_advanced_effects': True,
+    'advanced_effect_probability': 0.7,
+    'shadow_angle': 45,
+    'shadow_intensity': 0.4,
+    'lens_distortion_strength': 0.2,
+    'scanner_artifacts': True,
+    'compression_quality': 85,
+    'fold_probability': 0.4,
+    'crease_probability': 0.3,
+    'perspective_probability': 0.5,
+    'shadow_probability': 0.6,
+    'use_multiprocessing': False,
+    'num_processes': 4,
+    'enable_caching': True,
+    'debug_mode': False,
+    'image_dir': ''
+}

backend/app/services/synthetic/core.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+Core module containing main generation functions
+"""
+import os
+import random
+import logging
+from typing import Dict, List, Optional
+from PIL import Image
+from .config import ENHANCED_DEFAULT_PARAMS
+from .text_renderer import render_enhanced_sanskrit
+from .transformations import (
+    apply_enhanced_postprocessing,
+    create_comprehensive_effect_combinations,
+    apply_systematic_postprocessing,
+)
+logger = logging.getLogger(__name__)
+def generate_enhanced_sanskrit_samples(
+    text: str,
+    font_path: str = None,
+    output_dir: str = None,
+    params: Dict = None,
+) -> Optional[List[Image.Image]]:
+    if params is None:
+        params = ENHANCED_DEFAULT_PARAMS.copy()
+    else:
+        params = {**ENHANCED_DEFAULT_PARAMS, **params}
+    if font_path is None:
+        font_path = os.path.join(params['font_dir'], params['font'])
+    if not os.path.exists(font_path):
+        logger.error(f"Font not found at {font_path}")
+        return [] if output_dir is None else None
+    styles = ["lined_paper", "old_paper", "birch", "parchment"]
+    ink_colors = {
+        "lined_paper": (60, 30, 10),
+        "old_paper": (20, 20, 20),
+        "birch": (50, 20, 10),
+        "parchment": (10, 10, 10),
+    }
+    width, height = params['width'], params['height']
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    sampled_styles = random.choices(styles, k=params['base_images'])
+    style_counts = {style: sampled_styles.count(style) for style in styles}
+    logger.info(f"Randomly selected styles: {style_counts}")
+    base_images = []
+    for style, count in style_counts.items():
+        for i in range(count):
+            font_size = random.randint(12, 18)
+            output_path = (
+                os.path.join(output_dir, f"enhanced_sanskrit_{style}_{i+1}.png")
+                if output_dir
+                else None
+            )
+            img = render_enhanced_sanskrit(
+                text=text,
+                font_path=font_path,
+                output_path=output_path,
+                width=width,
+                height=height,
+                font_size=font_size,
+                style=style,
+                ink_color=ink_colors[style],
+                params=params,
+            )
+            if img:
+                base_images.append(img)
+                if params['apply_transforms'] and output_dir:
+                    base_filename = f"enhanced_sanskrit_{style}_{i+1}"
+                    transformed_images = apply_enhanced_postprocessing(
+                        img, output_dir, base_filename, params
+                    )
+                    base_images.extend(transformed_images[1:])
+    return base_images if output_dir is None else None
+def generate_comprehensive_dataset(
+    text: str, font_path: str = None, output_dir: str = None, params: Dict = None
+) -> List[Image.Image]:
+    if params is None:
+        params = ENHANCED_DEFAULT_PARAMS.copy()
+    else:
+        params = {**ENHANCED_DEFAULT_PARAMS, **params}
+    if font_path is None:
+        font_path = os.path.join(params['font_dir'], params['font'])
+    if not os.path.exists(font_path):
+        logger.error(f"Font not found at {font_path}")
+        return []
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    effect_combinations = create_comprehensive_effect_combinations()
+    styles = ["lined_paper", "old_paper", "birch", "parchment"]
+    ink_colors = {
+        "lined_paper": (60, 30, 10),
+        "old_paper": (20, 20, 20),
+        "birch": (50, 20, 10),
+        "parchment": (10, 10, 10),
+    }
+    width, height = params['width'], params['height']
+    all_generated_images: List[Image.Image] = []
+    logger.info(
+        f"Generating comprehensive dataset with {len(effect_combinations)} effect combinations"
+    )
+    for style in styles:
+        font_size = random.randint(14, 18)
+        output_path = os.path.join(output_dir, f"base_{style}.png") if output_dir else None
+        base_image = render_enhanced_sanskrit(
+            text=text,
+            font_path=font_path,
+            output_path=output_path,
+            width=width,
+            height=height,
+            font_size=font_size,
+            style=style,
+            ink_color=ink_colors[style],
+            params=params,
+        )
+        if base_image:
+            all_generated_images.append(base_image)
+            for combo_idx, effect_combo in enumerate(effect_combinations):
+                base_filename = f"comprehensive_{style}_{combo_idx:03d}"
+                enhanced_images = apply_systematic_postprocessing(
+                    base_image, output_dir, base_filename, params, effect_combo
+                )
+                all_generated_images.extend(enhanced_images[1:])
+    logger.info(f"Total images generated: {len(all_generated_images)}")
+    return all_generated_images
+def generate_ultra_realistic_samples(
+    text: str, output_dir: str = None, style_focus: str = None, params: Dict = None
+) -> List[Image.Image]:
+    if params is None:
+        params = ENHANCED_DEFAULT_PARAMS.copy()
+    ultra_realistic_params = {
+        **params,
+        'fold_intensity': 0.4,
+        'bleed_intensity': 0.35,
+        'shadow_intensity': 0.5,
+        'lens_distortion_strength': 0.15,
+        'aging_intensity': 0.7,
+        'fiber_density': 0.6,
+        'texture': 0.8,
+        'noise': 0.6,
+        'stains': 0.7,
+        'stain_intensity': 0.6,
+    }
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    ultra_combinations = [
+        ["fold_crease", "ink_bleed", "shadow_cast", "scanner_artifacts"],
+        ["perspective", "morphological", "lens_distortion", "washboard"],
+        ["cylinder", "scanner_artifacts", "lens_distortion", "shadow_cast"],
+        ["fold_crease", "ink_bleed", "morphological", "perspective"],
+        [
+            "fold_crease",
+            "ink_bleed",
+            "perspective",
+            "shadow_cast",
+            "morphological",
+            "scanner_artifacts",
+            "lens_distortion",
+        ],
+        ["perspective", "lens_distortion", "shadow_cast", "cylinder"],
+        ["washboard", "ink_bleed", "morphological", "fold_crease"],
+    ]
+    font_path = os.path.join(ultra_realistic_params['font_dir'], ultra_realistic_params['font'])
+    styles = ["lined_paper", "old_paper", "birch", "parchment"] if not style_focus else [style_focus]
+    ink_colors = {
+        "lined_paper": (60, 30, 10),
+        "old_paper": (20, 20, 20),
+        "birch": (50, 20, 10),
+        "parchment": (10, 10, 10),
+    }
+    all_images: List[Image.Image] = []
+    for style in styles:
+        base_image = render_enhanced_sanskrit(
+            text=text,
+            font_path=font_path,
+            output_path=None,
+            width=ultra_realistic_params['width'],
+            height=ultra_realistic_params['height'],
+            font_size=random.randint(14, 18),
+            style=style,
+            ink_color=ink_colors[style],
+            params=ultra_realistic_params,
+        )
+        if base_image:
+            for combo_idx, effect_combo in enumerate(ultra_combinations):
+                base_filename = f"ultra_realistic_{style}_{combo_idx:02d}"
+                enhanced_images = apply_systematic_postprocessing(
+                    base_image, output_dir, base_filename, ultra_realistic_params, effect_combo
+                )
+                all_images.extend(enhanced_images[1:])
+    return all_images

backend/app/services/synthetic/effects.py ADDED Viewed

	@@ -0,0 +1,218 @@

+"""
+Advanced image effects for synthetic text generation
+"""
+import cv2
+import numpy as np
+import random
+import logging
+from typing import List, Tuple
+from noise import pnoise2
+logger = logging.getLogger(__name__)
+class EffectPlugin:
+    def __init__(self, name: str, params: dict):
+        self.name = name
+        self.params = params
+        self.validate_params()
+    def apply(self, image: np.ndarray) -> np.ndarray:
+        raise NotImplementedError
+    def validate_params(self):
+        pass
+class AdvancedImageEffects:
+    @staticmethod
+    def generate_perlin_noise(width: int, height: int, scale: float = 0.1, octaves: int = 4) -> np.ndarray:
+        noise_map = np.zeros((height, width))
+        for i in range(height):
+            for j in range(width):
+                noise_map[i][j] = pnoise2(i * scale, j * scale, octaves=octaves)
+        return noise_map
+    @staticmethod
+    def simulate_paper_fiber_texture(width: int, height: int, fiber_density: float = 0.5) -> np.ndarray:
+        try:
+            base_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.02, 4)
+            fine_texture = AdvancedImageEffects.generate_perlin_noise(width, height, 0.1, 2)
+            combined = base_texture * 0.7 + fine_texture * 0.3
+            combined = ((combined + 1) / 2) * fiber_density * 20
+            texture = np.stack([combined, combined, combined], axis=2)
+            return texture.astype(np.uint8)
+        except Exception as e:
+            logger.warning(f"Failed to generate Perlin noise texture: {e}")
+            return np.random.randint(0, int(20 * fiber_density), (height, width, 3), dtype=np.uint8)
+    @staticmethod
+    def simulate_fold_crease(image: np.ndarray, fold_lines: List[Tuple], fold_intensity: float = 0.5) -> np.ndarray:
+        try:
+            height, width = image.shape[:2]
+            result = image.copy()
+            for fold_line in fold_lines:
+                y_coords, x_coords = np.ogrid[:height, :width]
+                x1, y1, x2, y2 = fold_line
+                line_length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
+                if line_length > 0:
+                    distances = np.abs((y2 - y1) * x_coords - (x2 - x1) * y_coords + x2 * y1 - y2 * x1) / line_length
+                    fold_width = min(width, height) * 0.1
+                    fold_profile = np.exp(-0.5 * (distances / fold_width)**2)
+                    fold_effect = fold_profile * fold_intensity * 40
+                    shadow_mask = (y_coords - y1) * (x2 - x1) - (x_coords - x1) * (y2 - y1) > 0
+                    shadow_effect = fold_profile * shadow_mask * fold_intensity * 20
+                    result = result.astype(np.float32)
+                    result[:, :, 0] -= fold_effect + shadow_effect
+                    result[:, :, 1] -= fold_effect + shadow_effect
+                    result[:, :, 2] -= fold_effect + shadow_effect
+                    result = np.clip(result, 0, 255).astype(np.uint8)
+            return result
+        except Exception as e:
+            logger.error(f"Error in fold/crease simulation: {e}")
+            return image
+    @staticmethod
+    def simulate_ink_bleed(image: np.ndarray, bleed_intensity: float = 0.3, bleed_radius: int = 3) -> np.ndarray:
+        try:
+            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            _, text_mask = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+            kernel_size = max(1, int(bleed_radius * 2 + 1))
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+            bleeding_mask = cv2.dilate(text_mask, kernel, iterations=1)
+            bleed_effect = cv2.GaussianBlur(bleeding_mask.astype(np.float32), (kernel_size, kernel_size), 0)
+            bleed_effect = bleed_effect * bleed_intensity / 255.0
+            result = image.astype(np.float32)
+            for c in range(3):
+                result[:, :, c] = result[:, :, c] * (1 - 0.5 * bleed_effect)
+            return np.clip(result, 0, 255).astype(np.uint8)
+        except Exception as e:
+            logger.error(f"Error in ink bleed simulation: {e}")
+            return image
+    @staticmethod
+    def apply_perspective_distortion(image: np.ndarray, corner_displacement: int = 20) -> np.ndarray:
+        try:
+            height, width = image.shape[:2]
+            src_points = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
+            dst_points = src_points.copy()
+            for i in range(4):
+                dst_points[i][0] += random.randint(-corner_displacement, corner_displacement)
+                dst_points[i][1] += random.randint(-corner_displacement, corner_displacement)
+            dst_points[:, 0] = np.clip(dst_points[:, 0], -width*0.1, width*1.1)
+            dst_points[:, 1] = np.clip(dst_points[:, 1], -height*0.1, height*1.1)
+            matrix = cv2.getPerspectiveTransform(src_points, dst_points)
+            result = cv2.warpPerspective(
+                image, matrix, (width, height), borderMode=cv2.BORDER_REPLICATE
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Error in perspective distortion: {e}")
+            return image
+    @staticmethod
+    def apply_shadow_effects(image: np.ndarray, shadow_angle: float = 45, shadow_intensity: float = 0.4) -> np.ndarray:
+        try:
+            height, width = image.shape[:2]
+            result = image.copy().astype(np.float32)
+            angle_rad = np.radians(shadow_angle)
+            x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
+            shadow_factor = (np.cos(angle_rad) * x_coords / width + np.sin(angle_rad) * y_coords / height)
+            shadow_factor = np.clip(shadow_factor, 0, 1)
+            shadow_effect = 1 - shadow_factor * shadow_intensity
+            for c in range(3):
+                result[:, :, c] *= shadow_effect
+            return np.clip(result, 0, 255).astype(np.uint8)
+        except Exception as e:
+            logger.error(f"Error in shadow effects: {e}")
+            return image
+    @staticmethod
+    def apply_morphological_operations(image: np.ndarray, operation: str = 'mixed', kernel_size: int = 3) -> np.ndarray:
+        try:
+            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+            if operation == 'erosion':
+                processed = cv2.erode(gray, kernel, iterations=1)
+            elif operation == 'dilation':
+                processed = cv2.dilate(gray, kernel, iterations=1)
+            elif operation == 'opening':
+                processed = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
+            elif operation == 'closing':
+                processed = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
+            else:
+                ops = ['erosion', 'dilation', 'opening', 'closing']
+                import random as _r
+                chosen = _r.choice(ops)
+                return AdvancedImageEffects.apply_morphological_operations(image, chosen, kernel_size)
+            return cv2.cvtColor(processed, cv2.COLOR_GRAY2RGB)
+        except Exception as e:
+            logger.error(f"Error in morphological operations: {e}")
+            return image
+    @staticmethod
+    def simulate_scanner_artifacts(image: np.ndarray, compression_quality: int = 85) -> np.ndarray:
+        try:
+            height, width = image.shape[:2]
+            result = image.copy()
+            for y in range(0, height, random.randint(8, 15)):
+                intensity = random.randint(5, 15)
+                if y < height:
+                    result[y, :, :] = np.clip(result[y, :, :] - intensity, 0, 255)
+            dust_count = random.randint(3, 8)
+            for _ in range(dust_count):
+                x = random.randint(0, width - 5)
+                y = random.randint(0, height - 5)
+                size = random.randint(2, 5)
+                dust_intensity = random.randint(20, 40)
+                result[y:y+size, x:x+size, :] = np.clip(result[y:y+size, x:x+size, :] - dust_intensity, 0, 255)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compression_quality]
+            _, encimg = cv2.imencode('.jpg', cv2.cvtColor(result, cv2.COLOR_RGB2BGR), encode_param)
+            result = cv2.imdecode(encimg, cv2.IMREAD_COLOR)
+            result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
+            return result
+        except Exception as e:
+            logger.error(f"Error in scanner artifacts: {e}")
+            return image
+    @staticmethod
+    def apply_lens_distortion(image: np.ndarray, strength: float = 0.2) -> np.ndarray:
+        try:
+            height, width = image.shape[:2]
+            center_x, center_y = width // 2, height // 2
+            y_coords, x_coords = np.ogrid[:height, :width]
+            distances = np.sqrt((x_coords - center_x)**2 + (y_coords - center_y)**2)
+            max_distance = np.sqrt(center_x**2 + center_y**2)
+            normalized_distances = distances / max_distance
+            distortion_factor = 1 + strength * normalized_distances**2
+            map_x = ((x_coords - center_x) / distortion_factor + center_x).astype(np.float32)
+            map_y = ((y_coords - center_y) / distortion_factor + center_y).astype(np.float32)
+            result = cv2.remap(image, map_x, map_y, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
+            return result
+        except Exception as e:
+            logger.error(f"Error in lens distortion: {e}")
+            return image
+def generate_random_fold_lines(image_size: Tuple[int, int], num_folds: int = None) -> List[Tuple]:
+    width, height = image_size
+    if num_folds is None:
+        num_folds = random.randint(1, 3)
+    fold_lines = []
+    for _ in range(num_folds):
+        x1 = random.randint(0, width)
+        y1 = random.randint(0, height)
+        x2 = random.randint(0, width)
+        y2 = random.randint(0, height)
+        fold_lines.append((x1, y1, x2, y2))
+    return fold_lines
+def safe_apply_effect(effect_func, image: np.ndarray, effect_name: str) -> np.ndarray:
+    try:
+        return effect_func(image)
+    except Exception as e:
+        logger.error(f"Error applying {effect_name}: {e}")
+        return image

backend/app/services/synthetic/huggingface_processor.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+Hugging Face dataset processor for downloading datasets and generating synthetic text images
+"""
+import os
+import logging
+import pandas as pd
+from typing import Dict, List, Optional
+from urllib.parse import urlparse
+import requests
+from PIL import Image
+from datasets import load_dataset
+import datasets
+from .config import ENHANCED_DEFAULT_PARAMS
+from .text_renderer import render_enhanced_sanskrit
+from .transformations import apply_enhanced_postprocessing
+logger = logging.getLogger(__name__)
+class HuggingFaceDatasetProcessor:
+    def __init__(self, output_dir: str = "hf_dataset_output", params: Dict = None):
+        self.output_dir = output_dir
+        self.params = params if params else ENHANCED_DEFAULT_PARAMS.copy()
+        self.image_dir = os.path.join(output_dir, "images")
+        self.csv_path = os.path.join(output_dir, "dataset.csv")
+        os.makedirs(self.image_dir, exist_ok=True)
+        self.ink_colors = {
+            "lined_paper": (60, 30, 10),
+            "old_paper": (20, 20, 20),
+            "birch": (50, 20, 10),
+            "parchment": (10, 10, 10),
+        }
+    def load_huggingface_dataset(self, dataset_name: str, config_name: str = None, split: str = None, streaming: bool = False):
+        try:
+            dataset = load_dataset(dataset_name, config_name, split=split, streaming=streaming)
+            if streaming:
+                return dataset
+            else:
+                if isinstance(dataset, datasets.DatasetDict):
+                    if split:
+                        df = dataset[split].to_pandas()
+                    elif 'train' in dataset:
+                        df = dataset['train'].to_pandas()
+                    else:
+                        first_split = list(dataset.keys())[0]
+                        df = dataset[first_split].to_pandas()
+                else:
+                    df = dataset.to_pandas()
+                return df
+        except Exception as e:
+            logger.error(f"Error loading Hugging Face dataset: {e}")
+            return None
+    def download_dataset_from_url(self, url: str, output_file: str = "dataset.csv") -> bool:
+        try:
+            if "huggingface.co/datasets" in url:
+                parsed = urlparse(url)
+                dataset_path = parsed.path.strip('/')
+                if "/blob/main/" in url:
+                    raw_url = url.replace("/blob/main/", "/raw/main/")
+                elif "/tree/main" in url:
+                    raw_url = url.replace("/tree/main", "/raw/main/dataset.csv")
+                else:
+                    raw_url = f"https://huggingface.co/{dataset_path}/raw/main/dataset.csv"
+            else:
+                raw_url = url
+            response = requests.get(raw_url, stream=True)
+            response.raise_for_status()
+            file_path = os.path.join(self.output_dir, output_file)
+            with open(file_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            return True
+        except Exception as e:
+            logger.error(f"Error downloading dataset: {e}")
+            return False
+    def load_dataset(self, file_path: str, text_column: str) -> Optional[pd.DataFrame]:
+        try:
+            encodings = ['utf-8', 'iso-8859-1', 'windows-1252', 'utf-16']
+            df = None
+            for encoding in encodings:
+                try:
+                    df = pd.read_csv(file_path, encoding=encoding)
+                    break
+                except UnicodeDecodeError:
+                    continue
+            if df is None:
+                raise Exception("Could not load dataset with any supported encoding")
+            if text_column not in df.columns:
+                raise Exception(f"Column '{text_column}' not found. Available columns: {list(df.columns)}")
+            initial_rows = len(df)
+            df = df.dropna(subset=[text_column])
+            df = df[df[text_column].str.strip() != ""]
+            return df
+        except Exception as e:
+            logger.error(f"Error loading dataset: {e}")
+            return None
+    def generate_images_from_dataset(self, dataset_df: pd.DataFrame, text_column: str, max_samples: int = None) -> List[Dict]:
+        results = []
+        if max_samples and max_samples < len(dataset_df):
+            dataset_df = dataset_df.head(max_samples)
+        styles = ["lined_paper", "old_paper", "birch", "parchment"]
+        for idx, row in dataset_df.iterrows():
+            try:
+                text = str(row[text_column]).strip()
+                if not text:
+                    continue
+                style = styles[idx % len(styles)]
+                base_filename = f"text_image_{idx:06d}"
+                image_filename = f"{base_filename}.png"
+                image_path = os.path.join(self.image_dir, image_filename)
+                img = render_enhanced_sanskrit(
+                    text=text,
+                    font_path=os.path.join(self.params['font_dir'], self.params['font']),
+                    output_path=None,
+                    width=self.params['width'],
+                    height=self.params['height'],
+                    font_size=14,
+                    style=style,
+                    ink_color=self.ink_colors[style],
+                    params=self.params,
+                )
+                if img is None:
+                    continue
+                if self.params.get('apply_transforms', True):
+                    transformed_images = apply_enhanced_postprocessing(img, None, base_filename, self.params)
+                    final_img = transformed_images[-1] if len(transformed_images) > 1 else img
+                else:
+                    final_img = img
+                final_img.save(image_path)
+                result = {
+                    'row_index': idx,
+                    'image_path': os.path.relpath(image_path, self.output_dir),
+                    'text': text,
+                    'style': style,
+                    'image_filename': image_filename,
+                }
+                for col in dataset_df.columns:
+                    if col != text_column:
+                        result[col] = row[col]
+                results.append(result)
+            except Exception as e:
+                logger.error(f"Error processing row {idx}: {e}")
+                continue
+        return results
+    def save_results_csv(self, results: List[Dict], additional_info: Dict = None):
+        try:
+            if not results:
+                return
+            df = pd.DataFrame(results)
+            important_cols = ['image_path', 'text', 'style', 'image_filename']
+            other_cols = [col for col in df.columns if col not in important_cols]
+            df = df[important_cols + other_cols]
+            df.to_csv(self.csv_path, index=False, encoding='utf-8')
+            if additional_info:
+                metadata_path = os.path.join(self.output_dir, "metadata.txt")
+                with open(metadata_path, 'w', encoding='utf-8') as f:
+                    f.write("Dataset Processing Metadata\n")
+                    f.write("=" * 30 + "\n")
+                    for key, value in additional_info.items():
+                        f.write(f"{key}: {value}\n")
+        except Exception as e:
+            logger.error(f"Error saving results: {e}")
+    def process_huggingface_dataset(self, dataset_identifier: str, text_column: str, max_samples: int = None, config_name: str = None, split: str = None) -> bool:
+        try:
+            df = None
+            if not dataset_identifier.startswith("http"):
+                df = self.load_huggingface_dataset(dataset_identifier, config_name=config_name, split=split)
+            if df is None and dataset_identifier.startswith("http"):
+                dataset_file = "downloaded_dataset.csv"
+                if self.download_dataset_from_url(dataset_identifier, dataset_file):
+                    dataset_path = os.path.join(self.output_dir, dataset_file)
+                    df = self.load_dataset(dataset_path, text_column)
+            if df is None:
+                return False
+            if text_column not in df.columns:
+                return False
+            results = self.generate_images_from_dataset(df, text_column, max_samples)
+            if not results:
+                return False
+            additional_info = {
+                "dataset_identifier": dataset_identifier,
+                "config_name": config_name,
+                "split": split,
+                "text_column": text_column,
+                "original_rows": len(df),
+                "processed_rows": len(results),
+                "max_samples": max_samples or "all",
+                "output_directory": self.output_dir,
+                "image_directory": self.image_dir,
+            }
+            self.save_results_csv(results, additional_info)
+            return True
+        except Exception as e:
+            logger.error(f"Error in dataset processing workflow: {e}")
+            return False
+    def process_local_csv(self, csv_path: str, text_column: str, max_samples: int = None) -> bool:
+        try:
+            df = self.load_dataset(csv_path, text_column)
+            if df is None:
+                return False
+            results = self.generate_images_from_dataset(df, text_column, max_samples)
+            if not results:
+                return False
+            additional_info = {
+                "source_file": csv_path,
+                "text_column": text_column,
+                "original_rows": len(df),
+                "processed_rows": len(results),
+                "max_samples": max_samples or "all",
+                "output_directory": self.output_dir,
+                "image_directory": self.image_dir,
+            }
+            self.save_results_csv(results, additional_info)
+            return True
+        except Exception as e:
+            logger.error(f"Error processing local CSV: {e}")
+            return False

backend/app/services/synthetic/text_renderer.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Text rendering module for Sanskrit/Oriya text with various effects
+"""
+import os
+import math
+import random
+import logging
+from typing import Dict, Tuple, Optional
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from .backgrounds import create_enhanced_background
+logger = logging.getLogger(__name__)
+def render_enhanced_sanskrit(
+    text: str,
+    font_path: str,
+    output_path: str,
+    width: int,
+    height: int,
+    font_size: int,
+    style: str,
+    ink_color: Tuple[int, int, int],
+    params: Dict,
+) -> Optional[Image.Image]:
+    img = create_enhanced_background(width, height, style, params)
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype(font_path, font_size)
+        words = text.strip().replace('\n', ' ').split()
+        y_position = random.randint(25, 75)
+        margin = 25
+        available_width = width - 2 * margin
+        space_width = draw.textlength(" ", font=font)
+        current_line = []
+        current_line_width = 0
+        all_lines = []
+        for word in words:
+            word_width = draw.textlength(word, font=font)
+            if current_line and current_line_width + space_width + word_width > available_width:
+                all_lines.append(current_line)
+                current_line = [word]
+                current_line_width = word_width
+            else:
+                if current_line:
+                    current_line_width += space_width + word_width
+                else:
+                    current_line_width = word_width
+                current_line.append(word)
+        if current_line:
+            all_lines.append(current_line)
+        for line in all_lines:
+            line_text = " ".join(line)
+            line_width = draw.textlength(line_text, font=font)
+            x_position = (width - line_width) // 2
+            baseline_offset = random.randint(-2, 2) * params["baseline"]
+            y_line_position = y_position + baseline_offset
+            if y_line_position + font_size > height - margin:
+                break
+            x_word_position = x_position
+            for word in line:
+                word_x_offset = int(random.uniform(-1.5, 1.5) * params["word_position"])
+                word_y_offset = int(random.uniform(-1, 1) * params["word_position"])
+                color_variation = int(random.randint(-3, 3) * params["ink_color"])
+                word_color = (
+                    np.clip(ink_color[0] + color_variation, 0, 255),
+                    np.clip(ink_color[1] + color_variation, 0, 255),
+                    np.clip(ink_color[2] + color_variation, 0, 255),
+                )
+                word_width = draw.textlength(word, font=font)
+                word_height = font_size * 1.2
+                if params["word_angle"] > 0:
+                    word_angle = random.uniform(-2, 2) * params["word_angle"]
+                    diagonal = math.sqrt(word_width**2 + word_height**2)
+                    padding = int(diagonal * 0.5)
+                    temp_width = int(diagonal + 2 * padding)
+                    temp_height = int(diagonal + 2 * padding)
+                    txt_img = Image.new('RGBA', (temp_width, temp_height), (0, 0, 0, 0))
+                    txt_d = ImageDraw.Draw(txt_img)
+                    center_x = temp_width // 2 - word_width // 2
+                    center_y = temp_height // 2 - word_height // 2
+                    txt_d.text((center_x, center_y), word, font=font, fill=word_color + (255,))
+                    rotated = txt_img.rotate(
+                        word_angle, resample=Image.BICUBIC, expand=0, center=(temp_width//2, temp_height//2)
+                    )
+                    paste_x = int(x_word_position + word_x_offset - padding)
+                    paste_y = int(y_line_position + word_y_offset - padding)
+                    img.paste(rotated, (paste_x, paste_y), rotated)
+                else:
+                    draw.text(
+                        (x_word_position + word_x_offset, y_line_position + word_y_offset),
+                        word, fill=word_color, font=font
+                    )
+                x_word_position += word_width + space_width
+            line_spacing_factor = 1.0 + (random.uniform(-0.1, 0.1) * params["line_spacing"])
+            y_position += int(font_size * 1.2 * line_spacing_factor)
+        if output_path is not None:
+            img.save(output_path)
+            logger.info(f"Saved rendered Sanskrit to {output_path}")
+        return img
+    except Exception as e:
+        logger.error(f"Error rendering text with font {font_path}: {e}")
+        return None

backend/app/services/synthetic/transformations.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+Transformations module for geometric transformations and post-processing effects
+"""
+import cv2
+import os
+import random
+import logging
+import itertools
+from math import pi
+from typing import List, Dict
+import numpy as np
+from PIL import Image, ImageEnhance, ImageFilter
+from .effects import AdvancedImageEffects, generate_random_fold_lines, safe_apply_effect
+logger = logging.getLogger(__name__)
+def cylindrical_edge_warp(pil_img: Image.Image, side: str = "left", strength: float = 0.6, warp_portion: float = 0.45) -> Image.Image:
+    try:
+        img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+        h, w = img.shape[:2]
+        W = int(warp_portion * w)
+        R = W / strength if strength != 0 else 1e9
+        X, Y = np.meshgrid(np.arange(w), np.arange(h))
+        map_x = X.astype(np.float32).copy()
+        map_y = Y.astype(np.float32).copy()
+        if side == "left":
+            strip = X < W
+            dx = W - X[strip]
+        else:
+            strip = X > (w - W)
+            dx = X[strip] - (w - W)
+        theta = dx / R
+        displacement = R * np.sin(theta) - dx
+        map_x[strip] += displacement
+        scale_y = np.cos(theta)
+        map_y[strip] = (Y[strip] - h/2) / scale_y + h/2
+        warped = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+        return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
+    except Exception as e:
+        logger.error(f"Error in cylindrical warp: {e}")
+        return pil_img
+def washboard_warp(pil_img: Image.Image, amplitude: float = 8, wavelength: float = 120, phase: float = 0.0, decay_from_top: bool = True) -> Image.Image:
+    try:
+        img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+        h, w = img.shape[:2]
+        x = np.arange(w, dtype=np.float32)
+        dy = amplitude * np.sin(2*pi*x / wavelength + phase)
+        if decay_from_top:
+            atten = np.linspace(1, 0.2, h, dtype=np.float32)[:, None]
+        else:
+            atten = 1.0
+        map_x, map_y = np.meshgrid(x, np.arange(h, dtype=np.float32))
+        map_y += dy * atten
+        warped = cv2.remap(img, map_x, map_y, cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+        return Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGR2RGB))
+    except Exception as e:
+        logger.error(f"Error in washboard warp: {e}")
+        return pil_img
+def apply_enhanced_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict) -> List[Image.Image]:
+    all_images = [original_image]
+    transforms = []
+    def rotate_image(img, angle):
+        bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
+        return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
+    def adjust_brightness(img, factor):
+        enhancer = ImageEnhance.Brightness(img)
+        return enhancer.enhance(factor)
+    def adjust_contrast(img, factor):
+        enhancer = ImageEnhance.Contrast(img)
+        return enhancer.enhance(factor)
+    def add_noise(img, intensity):
+        img_array = np.array(img).astype(np.float32)
+        noise = np.random.normal(0, intensity * 255, img_array.shape)
+        noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
+        return Image.fromarray(noisy_array)
+    def blur_image(img, radius):
+        return img.filter(ImageFilter.GaussianBlur(radius=radius))
+    transforms.append(("rotate", lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"]))))
+    transforms.append(("brightness", lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"]))))
+    transforms.append(("contrast", lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"]))))
+    transforms.append(("noise", lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"]))))
+    transforms.append(("blur", lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"]))))
+    transforms.append(("washboard", lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False]))))
+    transforms.append(("cylinder", lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5))))
+    if params.get('enable_advanced_effects', True):
+        if random.random() < params.get('fold_probability', 0.4):
+            transforms.append(("fold_crease", lambda img: Image.fromarray(
+                AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))
+            )))
+        if random.random() < params.get('advanced_effect_probability', 0.7):
+            transforms.append(("ink_bleed", lambda img: Image.fromarray(
+                AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))
+            )))
+        if random.random() < params.get('perspective_probability', 0.5):
+            transforms.append(("perspective", lambda img: Image.fromarray(
+                AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))
+            )))
+        if random.random() < params.get('shadow_probability', 0.6):
+            transforms.append(("shadow_cast", lambda img: Image.fromarray(
+                AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))
+            )))
+        if random.random() < params.get('advanced_effect_probability', 0.7):
+            transforms.append(("morphological", lambda img: Image.fromarray(
+                AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))
+            )))
+        if params.get('scanner_artifacts', True) and random.random() < 0.3:
+            transforms.append(("scanner_artifacts", lambda img: Image.fromarray(
+                AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))
+            )))
+        if random.random() < 0.3:
+            transforms.append(("lens_distortion", lambda img: Image.fromarray(
+                AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))
+            )))
+    if params["all_transforms"]:
+        selected_transforms = transforms
+    else:
+        n_transforms = random.randint(1, min(5, len(transforms)))
+        selected_transforms = random.sample(transforms, n_transforms)
+    for transform_name, transform_func in selected_transforms:
+        try:
+            transformed_img = safe_apply_effect(transform_func, original_image, transform_name)
+            if output_dir:
+                transformed_filename = f"{base_filename}_{transform_name}.png"
+                transformed_path = os.path.join(output_dir, transformed_filename)
+                transformed_img.save(transformed_path)
+                logger.info(f"Saved transformed image to {transformed_path}")
+            all_images.append(transformed_img)
+        except Exception as e:
+            logger.error(f"Error applying transform {transform_name}: {e}")
+    if len(selected_transforms) > 1:
+        try:
+            combined_img = original_image.copy()
+            for _, transform_func in selected_transforms:
+                combined_img = safe_apply_effect(transform_func, combined_img, "combined")
+            if output_dir:
+                combined_path = os.path.join(output_dir, f"{base_filename}_combined.png")
+                combined_img.save(combined_path)
+                logger.info(f"Saved combined transformation to {combined_path}")
+            all_images.append(combined_img)
+        except Exception as e:
+            logger.error(f"Error creating combined transformation: {e}")
+    return all_images
+def create_comprehensive_effect_combinations():
+    base_effects = ["rotate", "brightness", "contrast", "noise", "blur"]
+    geometric_effects = ["washboard", "cylinder"]
+    advanced_effects = [
+        "fold_crease",
+        "ink_bleed",
+        "perspective",
+        "shadow_cast",
+        "morphological",
+        "scanner_artifacts",
+        "lens_distortion",
+    ]
+    effect_combinations = []
+    for effect in base_effects + geometric_effects + advanced_effects:
+        effect_combinations.append([effect])
+    for combo in itertools.combinations(advanced_effects, 2):
+        effect_combinations.append(list(combo))
+    for geo in geometric_effects:
+        for adv in advanced_effects:
+            effect_combinations.append([geo, adv])
+    for combo in itertools.combinations(advanced_effects, 3):
+        effect_combinations.append(list(combo))
+    effect_combinations.append(advanced_effects[:4])
+    effect_combinations.append(advanced_effects[4:])
+    effect_combinations.append(advanced_effects)
+    return effect_combinations
+def apply_systematic_postprocessing(original_image: Image.Image, output_dir: str, base_filename: str, params: Dict, effect_combination: List[str] = None) -> List[Image.Image]:
+    all_images = [original_image]
+    def rotate_image(img, angle):
+        bg_color = tuple(np.array(img).mean(axis=(0, 1)).astype(int))
+        return img.rotate(angle, resample=Image.BICUBIC, expand=False, fillcolor=bg_color)
+    def adjust_brightness(img, factor):
+        enhancer = ImageEnhance.Brightness(img)
+        return enhancer.enhance(factor)
+    def adjust_contrast(img, factor):
+        enhancer = ImageEnhance.Contrast(img)
+        return enhancer.enhance(factor)
+    def add_noise(img, intensity):
+        img_array = np.array(img).astype(np.float32)
+        noise = np.random.normal(0, intensity * 255, img_array.shape)
+        noisy_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
+        return Image.fromarray(noisy_array)
+    def blur_image(img, radius):
+        return img.filter(ImageFilter.GaussianBlur(radius=radius))
+    transforms = {
+        "rotate": lambda img: rotate_image(img, random.uniform(-params["rotation_max"], params["rotation_max"])),
+        "brightness": lambda img: adjust_brightness(img, random.uniform(1.0-params["brightness_var"], 1.0+params["brightness_var"])),
+        "contrast": lambda img: adjust_contrast(img, random.uniform(1.0-params["contrast_var"], 1.0+params["contrast_var"])),
+        "noise": lambda img: add_noise(img, random.uniform(params["noise_min"], params["noise_max"])),
+        "blur": lambda img: blur_image(img, random.uniform(params["blur_min"], params["blur_max"])) ,
+        "washboard": lambda img: washboard_warp(img, amplitude=random.uniform(6, 12), wavelength=random.uniform(90, 150), phase=random.uniform(0, 2*pi), decay_from_top=random.choice([True, False])),
+        "cylinder": lambda img: cylindrical_edge_warp(img, side=random.choice(["left", "right"]), strength=random.uniform(0.4, 0.8) * random.choice([1, -1]), warp_portion=random.uniform(0.35, 0.5)),
+        "fold_crease": lambda img: Image.fromarray(AdvancedImageEffects.simulate_fold_crease(np.array(img), generate_random_fold_lines(img.size), params.get("fold_intensity", 0.3))),
+        "ink_bleed": lambda img: Image.fromarray(AdvancedImageEffects.simulate_ink_bleed(np.array(img), params.get("bleed_intensity", 0.3), params.get("bleed_radius", 3))),
+        "perspective": lambda img: Image.fromarray(AdvancedImageEffects.apply_perspective_distortion(np.array(img), params.get("corner_displacement", 20))),
+        "shadow_cast": lambda img: Image.fromarray(AdvancedImageEffects.apply_shadow_effects(np.array(img), params.get("shadow_angle", 45), params.get("shadow_intensity", 0.4))),
+        "morphological": lambda img: Image.fromarray(AdvancedImageEffects.apply_morphological_operations(np.array(img), params.get("morph_operation", "mixed"), params.get("morph_kernel_size", 3))),
+        "scanner_artifacts": lambda img: Image.fromarray(AdvancedImageEffects.simulate_scanner_artifacts(np.array(img), params.get("compression_quality", 85))),
+        "lens_distortion": lambda img: Image.fromarray(AdvancedImageEffects.apply_lens_distortion(np.array(img), params.get("lens_distortion_strength", 0.2))),
+    }
+    current_image = original_image
+    for effect_name in ["rotate", "brightness", "contrast", "noise", "blur"]:
+        current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
+    if effect_combination:
+        for effect_name in effect_combination:
+            if effect_name in transforms:
+                current_image = safe_apply_effect(transforms[effect_name], current_image, effect_name)
+        if output_dir:
+            combo_name = "_".join(effect_combination)
+            filename = f"{base_filename}_{combo_name}.png"
+            filepath = os.path.join(output_dir, filename)
+            current_image.save(filepath)
+            logger.info(f"Saved combination image: {filepath}")
+    all_images.append(current_image)
+    return all_images

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+fastapi
+uvicorn
+python-multipart
+pydantic
+google-api-core>=2.0.0
+google-generativeai
+# Image/text generation stack
+opencv-python
+pillow
+numpy
+scipy
+noise
+pandas
+requests
+matplotlib
+datasets

content/static/NotoSansOriya-Black (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb33fbf1d96373a315468ba4087645cac7fbf3b7f0da9cc5a7fb8d6bbc79f7e3
+size 142472

content/static/NotoSansOriya-Black.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb33fbf1d96373a315468ba4087645cac7fbf3b7f0da9cc5a7fb8d6bbc79f7e3
+size 142472

content/static/NotoSansOriya-Bold (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b904cd4edafaa595e5ba0e76094503b5f9a07967360c32013de63b7b9318e45f
+size 155216

content/static/NotoSansOriya-Bold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b904cd4edafaa595e5ba0e76094503b5f9a07967360c32013de63b7b9318e45f
+size 155216

content/static/NotoSansOriya-ExtraBold (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da1f187e6c30d6931ffbee9b144b1396568d68273542b24f80844bfa12a408d4
+size 142676

content/static/NotoSansOriya-ExtraBold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da1f187e6c30d6931ffbee9b144b1396568d68273542b24f80844bfa12a408d4
+size 142676

content/static/NotoSansOriya-ExtraLight (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c82202255f5b0ceee121dd00ed9fdccf03066283e5774a2e256fdb21c88292bc
+size 155120

content/static/NotoSansOriya-ExtraLight.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c82202255f5b0ceee121dd00ed9fdccf03066283e5774a2e256fdb21c88292bc
+size 155120

content/static/NotoSansOriya-Light (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffbf720886730b128dc4d0fb333d7741b22ca36a56d989cdfc7e90002249f88d
+size 155164

content/static/NotoSansOriya-Light.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffbf720886730b128dc4d0fb333d7741b22ca36a56d989cdfc7e90002249f88d
+size 155164

content/static/NotoSansOriya-Medium (2).ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e0457c076cdb02f963737648a248ff74c3f3ac29aca2d2e5c3266b9d163fb22
+size 155148

content/static/NotoSansOriya-Medium.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e0457c076cdb02f963737648a248ff74c3f3ac29aca2d2e5c3266b9d163fb22
+size 155148

content/static/NotoSansOriya-Regular.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5d16377ee01703170468402ad02d15595c77150bf62b4c31c66d3e79ad58039
+size 154960

content/static/NotoSansOriya-SemiBold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e86666b102c911434003af8b93442f2136f19fceab56ba940473a76c28d6801
+size 155176

content/static/NotoSansOriya-Thin.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d0fc04ba3330cca8d95d644a78829ad32195488df574aae4b18e09743023409
+size 154372

content/static/NotoSansOriya_Condensed-Black.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2f25042a7500ecbb04b5b4672a7e6549b3d837a44fe9799012b82f1740709a
+size 142696

content/static/NotoSansOriya_Condensed-Bold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e9d953ba9a0055476c14a688073ed4fd91ebfd0bdaad9c83ff2392a30463d17
+size 155080

content/static/NotoSansOriya_Condensed-ExtraBold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77cd61a8ae71154ee5076fd872854a60e2468c9777fe4b62b391ebd080ff9931
+size 142852

content/static/NotoSansOriya_Condensed-ExtraLight.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8275fdea5c4a89380e5139d3cfafa43cc6f9f38d98cc4dc19a0386382f3ec65f
+size 154960

content/static/NotoSansOriya_Condensed-Light.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d6e58df83c60b8aaff9652f8c1e728f6db5fd559d0c4ef3ba940d32c097ed32
+size 154964

content/static/NotoSansOriya_Condensed-Medium.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8e25c863a076f16b414027d92a36b3c8ae879421548013e8bfdc214671bc1b
+size 155092

content/static/NotoSansOriya_Condensed-Regular.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac6263cb259109278a7a4f94ed5dffa6f5b68755304fe1812912eff7d31dc332
+size 154976

content/static/NotoSansOriya_Condensed-SemiBold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0e563557bc6c181cf446eaa1ddf40ec3e7f06a22085a8d1833cf6e1499c7ef9
+size 155152

content/static/NotoSansOriya_Condensed-Thin.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82faf95ee90bd0b5cc56faba1fa888ae30b9984983b00c77b6c0157ba1df35d5
+size 154340

content/static/NotoSansOriya_ExtraCondensed-Black.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ab791dee9aeb1cd0bb66d98f3387e51ef2c279863c9af207a6048f34fa433e9
+size 142288

content/static/NotoSansOriya_ExtraCondensed-Bold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8ec663c3c038aa2e0e8f9c4cb6530d8b390d6610bb47f8064abdc8c0f00ebe
+size 154828

content/static/NotoSansOriya_ExtraCondensed-ExtraBold.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:536dcb3da1f946d0ba445327b4f3dc91762ebdfa71dd3edfe8fca8fba60171d7
+size 142396