Spaces:

Really-amin
/

PersianOCR

Running

App Files Files Community

Really-amin commited on Feb 23

Commit

647aae6

verified ·

1 Parent(s): 64bb3c8

Delete app/persian_ocr/app.py

Browse files

Files changed (1) hide show

app/persian_ocr/app.py +0 -998

app/persian_ocr/app.py DELETED Viewed

@@ -1,998 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import os
-import subprocess
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-import tensorflow as tf
-tf.get_logger().setLevel('ERROR')
-import logging
-from logging.handlers import RotatingFileHandler
-import pytesseract
-import easyocr
-import cv2
-import numpy as np
-from PIL import Image
-import re
-from typing import Dict, List, Optional, Any, Union, Tuple
-from hazm import Normalizer, word_tokenize
-import json
-from datetime import datetime
-from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
-import gradio as gr
-import fitz
-from tqdm import tqdm
-from dataclasses import dataclass
-from functools import lru_cache
-import threading
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import tempfile
-import shutil
-from pathlib import Path
-import hashlib
-import pickle
-from collections import OrderedDict
-import time
-import torch
-import psutil
-import warnings
-import io
-warnings.filterwarnings('ignore')
-from collections import namedtuple
-# --- ثابت‌های سیستم ---
-TESSERACT_CMD = '/usr/bin/tesseract'
-TESSDATA_PREFIX = '/usr/share/tesseract-ocr/4.00/tessdata'
-SUCCESS = "✅"
-FAILURE = "❌"
-PROCESSING = "🔄"
-# --- نصب وابستگی‌های سیستمی برای Hugging Face Spaces ---
-def setup_system_dependencies():
-    logger.info("START: نصب وابستگی‌های سیستمی برای Hugging Face Spaces")
-    try:
-        # بررسی و نصب Tesseract و زبان فارسی
-        if not os.path.isfile(TESSERACT_CMD):
-            logger.info("PROCESSING: نصب Tesseract OCR")
-            subprocess.run(['apt-get', 'update'], check=True)
-            subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
-            subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-fas'], check=True)
-        # نصب OpenCV و Fontconfig (برای Matplotlib و Fontconfig)
-        subprocess.run(['apt-get', 'install', '-y', 'libopencv-dev'], check=True)
-        subprocess.run(['apt-get', 'install', '-y', 'fontconfig'], check=True)
-        # تنظیم مسیر Tesseract
-        pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
-        os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
-        logger.info("SUCCESS: وابستگی‌های سیستمی نصب شدند")
-    except Exception as e:
-        logger.error(f"FAILURE: خطا در نصب وابستگی‌های سیستمی: {str(e)}")
-        raise
-# --- توابع Persian-OCR (از detect.py) ---
-def get_grayscale(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-def remove_noise(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    return cv2.medianBlur(image, 3)
-def thresholding(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None, None
-    return cv2.threshold(image, 160, 255, cv2.THRESH_BINARY)[1]
-def dilate(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    kernel = np.ones((5, 5), np.uint8)
-    return cv2.dilate(image, kernel, iterations=1)
-def erode(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    kernel = np.ones((5, 5), np.uint8)
-    return cv2.erode(image, kernel, iterations=1)
-def opening(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    kernel = np.ones((5, 5), np.uint8)
-    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
-def canny(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    return cv2.Canny(image, 100, 200)
-def deskew(image):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return None
-    coords = np.column_stack(np.where(image > 0))
-    if coords.size == 0:
-        return image
-    angle = cv2.minAreaRect(coords)[-1]
-    if angle < -45:
-        angle = -(90 + angle)
-    else:
-        angle = -angle
-    (h, w) = image.shape[:2]
-    center = (w // 2, h // 2)
-    M = cv2.getRotationMatrix2D(center, angle, 1.0)
-    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
-    return rotated
-def match_template(image, template):
-    if image is None or template is None or not isinstance(image, np.ndarray) or not isinstance(template, np.ndarray):
-        return None
-    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
-def persian_ocr_main(image: np.ndarray, langs="fa", mode="tn"):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        return ""
-    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_input:
-        cv2.imwrite(temp_input.name, image)
-        temp_input_path = temp_input.name
-    with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as temp_output:
-        temp_output_path = temp_output.name
-    im = Image.open(temp_input_path)
-    length_x, width_y = im.size
-    factor = float(1024.0 / length_x)
-    size = int(factor * length_x), int(factor * width_y)
-    image_resize = im.resize(size, Image.Resampling.LANCZOS)
-    image_resize.save(f"{temp_input_path}_Upscaled.png", dpi=(300, 300))
-    img = cv2.imread(f"{temp_input_path}_Upscaled.png")
-    gray = get_grayscale(img)
-    if gray is None:
-        os.remove(temp_input_path)
-        os.remove(temp_output_path)
-        return ""
-    img = gray  # فقط grayscale فعلاً فعاله
-    if langs == "fa":
-        if mode == "t":
-            custom_config = r'-l fas --psm 6 -c tessedit_char_blacklist="۰١۲۳۴۵۶۷۸۹«»1234567890#"'
-        elif mode == "tn":
-            custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی ۰١۲۳۴۵۶۷۸۹.?!,،:;/"'
-        elif mode == "table":
-            custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی۰١۲۳۴۵۶۷۸۹"'
-    elif langs == "en":
-        custom_config = r'-l eng --psm 6'
-    elif langs == "faen":
-        custom_config = r'-l fas+eng --psm 6'
-    else:
-        raise ValueError("Choose valid language options.")
-    text = pytesseract.image_to_string(img, config=custom_config)
-    with io.open(temp_output_path, 'w', encoding='utf8') as f:
-        f.write(text)
-    os.remove(temp_input_path)
-    os.remove(f"{temp_input_path}_Upscaled.png")
-    os.remove(temp_output_path)
-    return text
-# --- پیکربندی گزارش‌گیری ---
-class CustomFormatter(logging.Formatter):
-    grey = "\x1b[38;21m"
-    blue = "\x1b[38;5;39m"
-    yellow = "\x1b[38;5;226m"
-    red = "\x1b[38;5;196m"
-    bold_red = "\x1b[31;1m"
-    reset = "\x1b[0m"
-    STATUS_EMOJI = {
-        'START': '🟦', 'SUCCESS': '✅', 'FAILURE': '❌', 'LOADING': '⏳',
-        'PROCESSING': '🔄', 'WARNING': '⚠️', 'MEMORY': '💾'
-    }
-    def __init__(self, fmt):
-        super().__init__()
-        self.fmt = fmt
-        self.FORMATS = {
-            logging.DEBUG: self.grey + self.fmt + self.reset,
-            logging.INFO: self.blue + self.fmt + self.reset,
-            logging.WARNING: self.yellow + self.fmt + self.reset,
-            logging.ERROR: self.red + self.fmt + self.reset,
-            logging.CRITICAL: self.bold_red + self.fmt + self.reset
-        }
-    def format(self, record):
-        log_fmt = self.FORMATS.get(record.levelno)
-        formatter = logging.Formatter(log_fmt)
-        memory_usage = psutil.virtual_memory().percent
-        record.msg = f"{record.msg} [Mem: {memory_usage:.1f}%]"
-        for status, emoji in self.STATUS_EMOJI.items():
-            if status in record.msg:
-                record.msg = f"{emoji} {record.msg}"
-        return formatter.format(record)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-logs_dir = "/app/logs"
-os.makedirs(logs_dir, exist_ok=True)
-log_file = os.path.join(logs_dir, "ocr.log")
-file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
-file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-console_handler = logging.StreamHandler()
-console_handler.setFormatter(CustomFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-logger.addHandler(file_handler)
-logger.addHandler(console_handler)
-# --- Dataclass OCRResult ---
-OCRResult = namedtuple('OCRResult', ['text', 'numbers', 'confidence', 'model_name', 'processing_time',
-                                     'image_quality', 'detected_language', 'word_count', 'char_count',
-                                     'preprocessing_info', 'error_rate'])
-# --- مدیریت مدل ---
-models = {}
-model_performance = {
-    'pretrained_model': {'success': 0, 'fail': 0},
-    'mT5_OCR_fa': {'success': 0, 'fail': 0},
-    'LayoutLMv3_fa': {'success': 0, 'fail': 0},
-    'easyocr': {'success': 0, 'fail': 0},
-    'tesseract': {'success': 0, 'fail': 0},
-    'persian_ocr': {'success': 0, 'fail': 0}
-}
-model_configs = {
-    'pretrained_model': {'name': "beheshti-ai/TrOCR-fa", 'type': "transformer", 'threshold': 0.8, 'device': "cpu"},
-    'mT5_OCR_fa': {'name': "aleemeconomist/mT5-OCR-fa", 'type': "image-to-text", 'threshold': 0.7, 'device': "cpu"},
-    'LayoutLMv3_fa': {'name': "SoheilStar/LayoutLMv3-fa", 'type': "document-question-answering", 'threshold': 0.7, 'device': "cpu"},
-    'persian_ocr': {'name': "Persian-OCR", 'type': "custom", 'threshold': 0.75, 'device': "cpu"}
-}
-model_priority = ['pretrained_model', 'mT5_OCR_fa', 'LayoutLMv3_fa', 'easyocr', 'tesseract', 'persian_ocr']
-model_lock = threading.Lock()
-normalizer = Normalizer()
-def load_model(model_name: str, progress=None):
-    global models, model_performance, model_configs, model_lock
-    with model_lock:
-        if model_name in models:
-            return True
-        logger.info(f"START Loading model: {model_name}")
-        try:
-            config = model_configs.get(model_name)
-            if config:
-                if config['type'] == "image-to-text":
-                    models[model_name] = pipeline(config['type'], model=config['name'], device=config['device'])
-                elif config['type'] == "document-question-answering":
-                    models[model_name] = pipeline(config['type'], model=config['name'], device=config['device'])
-                elif config['type'] == "transformer":
-                    if progress:
-                        progress(0.3)
-                    processor = TrOCRProcessor.from_pretrained(config['name'])
-                    if progress:
-                        progress(0.6)
-                    model_instance = VisionEncoderDecoderModel.from_pretrained(config['name'])
-                    models[model_name] = {'processor': processor, 'model': model_instance, 'device': config['device']}
-                elif config['type'] == "custom" and model_name == "persian_ocr":
-                    models[model_name] = True  # نیازی به بارگذاری خاص نداره
-            elif model_name == "easyocr":
-                if progress:
-                    progress(0.5)
-                models[model_name] = easyocr.Reader(['fa', 'en'], gpu=torch.cuda.is_available())
-            elif model_name == "tesseract":
-                # تنظیم Tesseract برای Hugging Face Spaces
-                pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
-                os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
-                models[model_name] = True
-            logger.info(f"SUCCESS Model {model_name} loaded")
-            model_performance[model_name]['success'] += 1
-            if progress:
-                progress(1.0)
-            return True
-        except Exception as e:
-            logger.error(f"FAILURE Error loading {model_name}: {str(e)}")
-            model_performance[model_name]['fail'] += 1
-            if progress:
-                progress(1.0)
-            return False
-def process_image(image: np.ndarray, progress=None):
-    global models, model_performance, model_priority, model_configs
-    start_time = time.time()
-    if progress:
-        progress(0.0)
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        logger.error("FAILURE Input image to process_image is invalid or empty")
-        raise ValueError("Input image is invalid or empty")
-    logger.debug(f"Processing image with shape: {image.shape}")
-    sorted_models_priority = sorted(
-        model_priority,
-        key=lambda x: model_performance[x]['success'] / (model_performance[x]['fail'] + 1),
-        reverse=True
-    )
-    for i, model_name in enumerate(sorted_models_priority):
-        try:
-            if not load_model(model_name, progress):
-                continue
-            if progress:
-                progress((i + 1) / len(sorted_models_priority))
-            result_dict = None
-            config = model_configs.get(model_name)
-            if model_name in model_configs:
-                if config['type'] == "transformer":
-                    result_dict = _process_transformer_model_full(image, model_name)
-                elif config['type'] == "image-to-text":
-                    result_dict = _process_transformer_model(image, model_name)
-                elif config['type'] == "document-question-answering":
-                    result_dict = _process_transformer_model(image, model_name)
-                elif model_name == 'persian_ocr':
-                    result_dict = _process_persian_ocr(image)
-            elif model_name == 'easyocr':
-                result_dict = _process_easyocr(image)
-            elif model_name == 'tesseract':
-                result_dict = _process_tesseract(image)
-            if result_dict and 'text' in result_dict and result_dict['text'].strip():
-                processing_time = time.time() - start_time
-                ocr_result = _format_result(
-                    result_dict['text'],
-                    result_dict.get('confidence', 0.5),
-                    model_name,
-                    processing_time
-                )
-                threshold = model_configs.get(model_name, {}).get('threshold', 0.5)
-                if ocr_result.confidence >= threshold:
-                    logger.info(f"SUCCESS Model {model_name} succeeded")
-                    if progress:
-                        progress(1.0)
-                    return ocr_result
-        except Exception as e:
-            logger.warning(f"WARNING Model {model_name} failed: {str(e)}")
-            continue
-    logger.warning("WARNING No model succeeded")
-    if progress:
-        progress(1.0)
-    return None
-def _process_transformer_model(image: np.ndarray, model_name: str):
-    global models, model_configs
-    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-    model = models[model_name]
-    if model_configs[model_name]['type'] == "image-to-text":
-        result = model(pil_image)[0]
-        return {'text': result['generated_text'], 'confidence': model_configs[model_name]['threshold']}
-    else:  # "document-question-answering"
-        result = model(pil_image)
-        return {'text': result['answer'], 'confidence': model_configs[model_name]['threshold']}
-def _process_transformer_model_full(image: np.ndarray, model_name: str):
-    global models, model_configs
-    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-    processor = models[model_name]['processor']
-    model = models[model_name]['model']
-    device = models[model_name]['device']
-    pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
-    generated_ids = model.generate(pixel_values)
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return {'text': generated_text, 'confidence': model_configs[model_name]['threshold']}
-def _process_easyocr(image: np.ndarray):
-    global models
-    results = models['easyocr'].readtext(image)
-    if not results:
-        return {'text': '', 'confidence': 0}
-    texts = [text for _, text, _ in results]
-    confidence_sum = sum(conf for _, _, conf in results)
-    confidence_avg = confidence_sum / len(results) if results else 0
-    return {'text': ' '.join(texts), 'confidence': confidence_avg}
-def _process_tesseract(image: np.ndarray):
-    text = pytesseract.image_to_string(image, config='--oem 3 --psm 6 -l fas+eng')
-    return {'text': text, 'confidence': 0.5}
-def _process_persian_ocr(image: np.ndarray):
-    try:
-        if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-            return {'text': '', 'confidence': 0}
-        text = persian_ocr_main(image, langs="fa", mode="tn")
-        return {'text': text, 'confidence': 0.75}
-    except Exception as e:
-        logger.error(f"FAILURE Persian-OCR processing failed: {str(e)}")
-        return {'text': '', 'confidence': 0}
-def _format_result(text: str, confidence: float, model_name: str, processing_time: float):
-    global normalizer
-    normalized_text = normalizer.normalize(text)
-    words = word_tokenize(normalized_text)
-    persian_nums = '۰۱۲۳۴۵۶۷۸۹'
-    number_pattern = f'^[0-9{persian_nums}]+([\\.,،٫][0-9{persian_nums}]+)?$'
-    numbers = [w for w in words if re.match(number_pattern, w)]
-    text_list = [w for w in words if not re.match(number_pattern, w)]
-    return OCRResult(
-        text=text_list,
-        numbers=numbers,
-        confidence=confidence,
-        model_name=model_name,
-        processing_time=processing_time,
-        image_quality=_assess_quality(text_list),
-        detected_language=_detect_language(text_list),
-        word_count=len(text_list),
-        char_count=sum(len(w) for w in text_list),
-        preprocessing_info={},
-        error_rate=_estimate_error_rate(text_list, confidence)
-    )
-def _estimate_error_rate(text_list: List[str], confidence: float):
-    if not text_list:
-        return 1.0
-    avg_word_length = sum(len(w) for w in text_list) / len(text_list) if text_list else 0
-    return max(0.0, min(1.0, 1.0 - confidence + (3 - avg_word_length) / 10))
-def _assess_quality(text_list: List[str]):
-    if not text_list:
-        return "Low"
-    avg_word_length = sum(len(w) for w in text_list) / len(text_list) if text_list else 0
-    word_count = len(text_list)
-    return "High" if word_count > 50 and avg_word_length > 3 else "Medium" if word_count > 20 and avg_word_length > 2 else "Low"
-def _detect_language(text_list: List[str]):
-    if not text_list:
-        return "Unknown"
-    persian_pattern = re.compile(r'[\u0600-\u06FF]')
-    english_pattern = re.compile(r'[a-zA-Z]')
-    persian_chars = sum(1 for word in text_list for _ in persian_pattern.finditer(word))
-    english_chars = sum(1 for word in text_list for _ in english_pattern.finditer(word))
-    return "Persian" if persian_chars > english_chars else "English" if english_chars > persian_chars else "Mixed"
-# --- ImagePreprocessor functions ---
-default_preprocessing_settings = {
-    'resize': True, 'resize_scale': 200, 'enhance_contrast': True, 'reduce_noise': True,
-    'sharpen': True, 'deskew': True, 'threshold': True
-}
-def enhance_for_persian(image: np.ndarray, settings: Dict[str, Any], progress=None):
-    info = {}
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        logger.error("FAILURE Input image to enhance_for_persian is invalid or empty")
-        return None, {}
-    logger.debug(f"Enhancing image with shape: {image.shape}")
-    processed = image.copy()
-    current_settings = default_preprocessing_settings.copy()
-    current_settings.update(settings)
-    try:
-        step = 0
-        total_steps = 7
-        if progress:
-            progress(step / total_steps)
-        height, width = processed.shape[:2]
-        if height <= 0 or width <= 0:
-            logger.error(f"FAILURE Invalid image dimensions: height={height}, width={width}")
-            return None, {}
-        logger.debug(f"Image shape before grayscale: {processed.shape}")
-        if len(processed.shape) == 3:
-            try:
-                processed = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
-                info['grayscale'] = True
-            except cv2.error as e:
-                logger.error(f"FAILURE Grayscale conversion failed: {str(e)}")
-                return None, {}
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after grayscale: {processed.shape}")
-        if current_settings.get('resize'):
-            scale_percent = current_settings.get('resize_scale', 200)
-            if scale_percent != 100:
-                new_width = int(width * scale_percent / 100)
-                new_height = int(height * scale_percent / 100)
-                if new_width > 0 and new_height > 0:
-                    try:
-                        processed = cv2.resize(processed, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-                        info['resized'] = f"{scale_percent}%"
-                    except cv2.error as e:
-                        logger.error(f"FAILURE Resize failed: {str(e)}")
-                        return None, {}
-                else:
-                    logger.warning(f"WARNING Resize skipped due to invalid dimensions: width={new_width}, height={new_height}")
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after resize: {processed.shape}")
-        if current_settings.get('enhance_contrast'):
-            try:
-                clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
-                processed = clahe.apply(processed)
-                info['contrast_enhanced'] = True
-            except cv2.error as e:
-                logger.error(f"FAILURE Contrast enhancement failed: {str(e)}")
-                return None, {}
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after contrast: {processed.shape}")
-        if current_settings.get('reduce_noise'):
-            try:
-                processed = cv2.bilateralFilter(processed, 9, 75, 75)
-                info['noise_reduced'] = True
-            except cv2.error as e:
-                logger.error(f"FAILURE Noise reduction failed: {str(e)}")
-                return None, {}
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after noise reduction: {processed.shape}")
-        if current_settings.get('sharpen'):
-            try:
-                kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
-                processed = cv2.filter2D(processed, -1, kernel)
-                info['sharpened'] = True
-            except cv2.error as e:
-                logger.error(f"FAILURE Sharpening failed: {str(e)}")
-                return None, {}
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after sharpen: {processed.shape}")
-        if current_settings.get('deskew'):
-            try:
-                coords = np.column_stack(np.where(processed > 0))
-                if coords.size > 0:
-                    angle = cv2.minAreaRect(coords)[-1]
-                    if angle < -45:
-                        angle = 90 + angle
-                    center = (processed.shape[1] // 2, processed.shape[0] // 2)
-                    M = cv2.getRotationMatrix2D(center, angle, 1.0)
-                    processed = cv2.warpAffine(processed, M, (processed.shape[1], processed.shape[0]),
-                                              flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
-                    info['deskewed'] = f"angle: {angle:.2f}"
-                else:
-                    logger.warning("WARNING No contours found for deskewing")
-            except Exception as e:
-                logger.warning(f"WARNING Deskew failed: {e}")
-        step += 1
-        if progress:
-            progress(step / total_steps)
-        logger.debug(f"Image shape after deskew: {processed.shape}")
-        if current_settings.get('threshold'):
-            try:
-                processed = cv2.adaptiveThreshold(processed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                                  cv2.THRESH_BINARY, 11, 2)
-                info['thresholded'] = True
-            except cv2.error as e:
-                logger.error(f"FAILURE Thresholding failed: {str(e)}")
-                return None, {}
-        step += 1
-        if progress:
-            progress(1.0)
-        logger.debug(f"Image shape after threshold: {processed.shape}")
-        return processed, info
-    except Exception as e:
-        logger.error(f"FAILURE Preprocessing error: {str(e)}")
-        if progress:
-            progress(1.0)
-        return None, {}
-def remove_background(image: np.ndarray):
-    if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
-        logger.error("FAILURE Input image for background removal is invalid or empty")
-        return None
-    try:
-        logger.debug(f"Removing background from image with shape: {image.shape}")
-        gray = image if len(image.shape) == 2 else cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
-        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
-        return cv2.bitwise_and(image, image, mask=mask)
-    except Exception as e:
-        logger.error(f"FAILURE Background removal failed: {str(e)}")
-        return None
-# --- PDFProcessor functions ---
-pdf_temp_dir = Path(tempfile.mkdtemp())
-pdf_processing_settings = {
-    'dpi': 300, 'scale_factor': 2, 'split_pages': True, 'extract_images': True, 'batch_size': 2
-}
-def process_pdf_document(pdf_path: str, settings: Dict[str, Any], progress=None):
-    global pdf_temp_dir, pdf_processing_settings
-    logger.info(f"START Processing PDF: {pdf_path}")
-    all_images = []
-    if not os.path.exists(pdf_path):
-        logger.error(f"FAILURE PDF file not found: {pdf_path}")
-        return []
-    try:
-        doc = fitz.open(pdf_path)
-        total_pages = len(doc)
-        batch_size = settings.get('batch_size', pdf_processing_settings['batch_size'])
-        batches = [range(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
-        with tqdm(total=total_pages, desc="📄 Processing PDF") as pbar:
-            for batch in batches:
-                with ThreadPoolExecutor(max_workers=batch_size) as executor:
-                    futures = {executor.submit(_process_pdf_page, doc, page_num, settings, progress): page_num
-                               for page_num in batch}
-                    for future in as_completed(futures):
-                        result = future.result()
-                        if result and isinstance(result, list):
-                            all_images.extend(result)
-                        pbar.update(1)
-        logger.info(f"SUCCESS Extracted {len(all_images)} images")
-        return all_images
-    except Exception as e:
-        logger.error(f"FAILURE PDF processing failed: {str(e)}")
-        return []
-def _process_pdf_page(doc, page_num: int, settings: Dict[str, Any], progress=None):
-    images = []
-    try:
-        page = doc.load_page(page_num)
-        pix = page.get_pixmap(matrix=fitz.Matrix(settings.get('scale_factor', 2), settings.get('scale_factor', 2)))
-        if pix.n <= 0 or pix.width <= 0 or pix.height <= 0 or not pix.samples:
-            logger.error(f"FAILURE Invalid pixmap data for page {page_num + 1}")
-            return []
-        img_data = np.frombuffer(pix.samples, dtype=np.uint8)
-        expected_size = pix.width * pix.height * pix.n
-        if img_data.size != expected_size:
-            logger.error(f"FAILURE Pixmap data size mismatch for page {page_num + 1}: expected {expected_size}, got {img_data.size}")
-            return []
-        img = img_data.reshape(pix.height, pix.width, pix.n)
-        logger.debug(f"Image shape from pixmap: {img.shape}")
-        processed_img, _ = enhance_for_persian(img, settings, progress)
-        if processed_img is not None:
-            images.append(processed_img)
-        if settings.get('extract_images', True):
-            for img_info in page.get_images(full=True):
-                xref = img_info[0]
-                try:
-                    base_image = fitz.Pixmap(doc, xref)
-                    if base_image.n >= 4:
-                        base_image = fitz.Pixmap(fitz.csRGB, base_image)
-                    if base_image.n <= 0 or base_image.width <= 0 or base_image.height <= 0 or not base_image.samples:
-                        logger.warning(f"WARNING Invalid extracted pixmap for page {page_num + 1}, skipping")
-                        continue
-                    img_array = np.frombuffer(base_image.samples, dtype=np.uint8).reshape(
-                        base_image.height, base_image.width, 3 if base_image.n >= 3 else 1)
-                    if img_array.shape[0] > 100 and img_array.shape[1] > 100:
-                        processed_img_extracted, _ = enhance_for_persian(img_array, settings, progress)
-                        if processed_img_extracted is not None:
-                            images.append(processed_img_extracted)
-                except Exception as e:
-                    logger.warning(f"WARNING Failed to process extracted image for page {page_num + 1}: {str(e)}")
-                    continue
-        if progress:
-            progress(1.0)
-        return images
-    except Exception as e:
-        logger.error(f"FAILURE Page {page_num + 1} processing failed: {str(e)}")
-        if progress:
-            progress(1.0)
-        return []
-def optimize_pdf_document(pdf_path: str, settings: Dict[str, Any], progress=None):
-    global pdf_temp_dir, default_preprocessing_settings
-    logger.info(f"START Optimizing PDF: {pdf_path}")
-    logger.debug(f"PDF Path for optimization: {pdf_path}")
-    if not os.path.exists(pdf_path):
-        logger.error(f"FAILURE PDF file not found: {pdf_path}")
-        return pdf_path
-    try:
-        output_path = Path(f"/app/optimized_{Path(pdf_path).name}")
-        doc = fitz.open(pdf_path)
-        new_doc = fitz.open()
-        total_pages = len(doc)
-        for page_num in tqdm(range(total_pages), desc="📄 Optimizing PDF"):
-            page = doc.load_page(page_num)
-            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-            if pix.n <= 0 or pix.width <= 0 or pix.height <= 0 or not pix.samples:
-                logger.error(f"FAILURE Invalid pixmap data for page {page_num + 1}")
-                continue
-            img_data = np.frombuffer(pix.samples, dtype=np.uint8)
-            expected_size = pix.width * pix.height * pix.n
-            if img_data.size != expected_size:
-                logger.error(f"FAILURE Pixmap data size mismatch for page {page_num + 1}: expected {expected_size}, got {img_data.size}")
-                continue
-            img = img_data.reshape(pix.height, pix.width, pix.n)
-            logger.debug(f"Image shape from pixmap: {img.shape}")
-            processed_img, _ = enhance_for_persian(img, settings, progress)
-            if processed_img is None:
-                continue
-            img_path = pdf_temp_dir / f"temp_page_{page_num}.jpg"
-            cv2.imwrite(str(img_path), processed_img)
-            temp_doc = fitz.open(str(img_path))
-            new_doc.insert_pdf(temp_doc)
-            temp_doc.close()
-            os.remove(img_path)
-        new_doc.save(str(output_path))
-        logger.info(f"SUCCESS PDF optimized: {output_path}")
-        if progress:
-            progress(1.0)
-        return str(output_path)
-    except Exception as e:
-        logger.error(f"FAILURE PDF optimization failed: {str(e)}")
-        if progress:
-            progress(1.0)
-        return pdf_path
-def cleanup_pdf_temp_dir():
-    global pdf_temp_dir
-    try:
-        shutil.rmtree(pdf_temp_dir)
-    except Exception as e:
-        logger.error(f"FAILURE Temp cleanup failed: {str(e)}")
-# --- Cache functions ---
-cache_data = OrderedDict()
-cache_max_size = 1000
-cache_lock = threading.Lock()
-cache_dir_path = Path("/app/cache")
-def setup_cache_dir():
-    global cache_dir_path
-    cache_dir_path.mkdir(exist_ok=True)
-def _get_cache_key(image: np.ndarray):
-    return hashlib.md5(image.tobytes()).hexdigest()
-def get_cache(image: np.ndarray):
-    global cache_data, cache_lock
-    key = _get_cache_key(image)
-    with cache_lock:
-        if key in cache_data:
-            value = cache_data.pop(key)
-            cache_data[key] = value
-            return pickle.loads(value)
-    return None
-def set_cache(image: np.ndarray, result: OCRResult):
-    global cache_data, cache_max_size, cache_lock
-    key = _get_cache_key(image)
-    with cache_lock:
-        if len(cache_data) >= cache_max_size:
-            cache_data.popitem(last=False)
-        cache_data[key] = pickle.dumps(result)
-# --- PersianOCR functions (Main logic) ---
-default_ocr_settings = {
-    'resize': True, 'resize_scale': 200, 'enhance_contrast': True, 'reduce_noise': True,
-    'sharpen': True, 'deskew': True, 'optimize_for_ocr': True, 'extract_images': True,
-    'cache_enabled': True, 'max_workers': 4
-}
-def process_single_image(image: Union[str, np.ndarray], settings: Optional[Dict] = None, progress=None):
-    global default_ocr_settings
-    start_time = time.time()
-    current_settings = default_ocr_settings.copy()
-    if settings:
-        current_settings.update(settings)
-    try:
-        if isinstance(image, str):
-            logger.debug(f"Loading image from path: {image}")
-            image = cv2.imread(image)
-            if image is None:
-                logger.error(f"FAILURE Failed to load image from path: {image}")
-                return OCRResult([], [], 0.0, "None", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            logger.debug(f"Image loaded with shape: {image.shape}")
-        if current_settings.get('cache_enabled'):
-            cached_result = get_cache(image)
-            if cached_result:
-                logger.info("SUCCESS Cache hit")
-                return cached_result
-        result = process_image(image, progress)
-        if result and result.text:
-            if current_settings.get('cache_enabled'):
-                set_cache(image, result)
-            return result
-        processed_image, preprocess_info = enhance_for_persian(image, current_settings, progress)
-        if processed_image is None:
-            return OCRResult([], [], 0.0, "None", time.time() - start_time, "Unknown", "Unknown", 0, 0, preprocess_info, 0.0)
-        result = process_image(processed_image, progress)
-        if result:
-            result = result._replace(preprocessing_info=preprocess_info)
-            if current_settings.get('cache_enabled'):
-                set_cache(image, result)
-            return result
-        preprocess_info = preprocess_info if 'preprocess_info' in locals() else {}
-        return OCRResult([], [], 0.0, "None", time.time() - start_time, "Unknown", "Unknown", 0, 0, preprocess_info, 0.0)
-    except Exception as e:
-        logger.error(f"FAILURE Image processing failed: {str(e)}")
-        return OCRResult([], [], 0.0, "Error", time.time() - start_time, "Unknown", "Unknown", 0, 0, {}, 0.0)
-def process_pdf(pdf_path: str, settings: Optional[Dict] = None, progress=None):
-    global default_ocr_settings
-    current_settings = default_ocr_settings.copy()
-    if settings:
-        current_settings.update(settings)
-    logger.info(f"START Processing PDF: {pdf_path}")
-    results = []
-    if not pdf_path or not os.path.exists(pdf_path):
-        logger.error(f"FAILURE PDF file not found or invalid: {pdf_path}")
-        return [OCRResult([], [], 0.0, "None", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)]
-    try:
-        optimized_pdf = optimize_pdf_document(pdf_path, current_settings, progress)
-        if not optimized_pdf or not os.path.exists(optimized_pdf):
-            logger.error(f"FAILURE Optimized PDF not generated: {optimized_pdf}")
-            return [OCRResult([], [], 0.0, "None", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)]
-        images = process_pdf_document(optimized_pdf, current_settings, progress)
-        if not images:
-            logger.warning("WARNING No images extracted from PDF")
-            return [OCRResult([], [], 0.0, "None", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)]
-        with ThreadPoolExecutor(max_workers=current_settings.get('max_workers')) as executor:
-            futures = [executor.submit(process_single_image, img, current_settings, progress) for img in images if img is not None]
-            for future in as_completed(futures):
-                result = future.result()
-                if result and result.text:
-                    results.append(result)
-        logger.info(f"SUCCESS Processed {len(results)} pages")
-        return results if results else [OCRResult([], [], 0.0, "None", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)]
-    except Exception as e:
-        logger.error(f"FAILURE PDF processing failed: {str(e)}")
-        return [OCRResult([], [], 0.0, "Error", 0.0, "Unknown", "Unknown", 0, 0, {}, 0.0)]
-# --- رابط کاربری Gradio ---
-def create_gradio_interface():
-    def process_file(file, use_cache: bool, preprocessing: bool, confidence: float, scale: int,
-                     enhance_contrast: bool, reduce_noise: bool, extract_images: bool):
-        if file is None:
-            logger.error("FAILURE No file provided")
-            return ("", "", "0.0", "None", "0.0", "Unknown", "No file uploaded")
-        settings = {
-            'cache_enabled': use_cache, 'preprocessing_enabled': preprocessing, 'confidence_threshold': confidence,
-            'resize': True, 'resize_scale': scale, 'enhance_contrast': enhance_contrast,
-            'reduce_noise': reduce_noise, 'extract_images': extract_images, 'sharpen': True, 'deskew': True,
-            'optimize_for_ocr': True
-        }
-        progress = gr.Progress(track_tqdm=True)
-        try:
-            if file.name.lower().endswith('.pdf'):
-                results = process_pdf(file.name, settings, progress)
-                full_text = ""
-                numbers_combined = []
-                confidences = []
-                models_used = []
-                times = []
-                qualities = []
-                preprocess_infos = []
-                for res in results:
-                    full_text += "\n" + " ".join(res.text)
-                    numbers_combined.extend(res.numbers)
-                    confidences.append(f"{res.confidence:.2f}")
-                    models_used.append(res.model_name)
-                    times.append(f"{res.processing_time:.2f} seconds")
-                    qualities.append(res.image_quality)
-                    preprocess_infos.append("\n".join([f"{k}: {v}" for k, v in res.preprocessing_info.items()]))
-                combined_preprocess_info = "\nPage-wise Preprocessing Info:\n" + "\n\n".join(preprocess_infos) if preprocess_infos else ""
-                return (
-                    full_text.strip(),
-                    ", ".join(numbers_combined),
-                    ", ".join(confidences),
-                    ", ".join(models_used),
-                    ", ".join(times),
-                    ", ".join(qualities),
-                    combined_preprocess_info
-                )
-            else:
-                result = process_single_image(file.name, settings, progress)
-                if result and result.text:
-                    preprocess_info = "\n".join([f"{k}: {v}" for k, v in result.preprocessing_info.items()]) if result.preprocessing_info else ""
-                    return (
-                        "\n".join(result.text),
-                        ", ".join(result.numbers),
-                        f"{result.confidence:.2f}",
-                        result.model_name,
-                        f"{result.processing_time:.2f} seconds",
-                        result.image_quality,
-                        preprocess_info
-                    )
-                return ("", "", "0.0", "None", "0.0", "Unknown", "No text extracted")
-        except Exception as e:
-            logger.error(f"FAILURE Interface error: {str(e)}")
-            return ("", "", "0.0", "Error", "0.0", "Unknown", str(e))
-    with gr.Blocks(title="سیستم OCR فارسی پیشرفته") as interface:
-        gr.Markdown("# سیستم OCR فارسی پیشرفته")
-        with gr.Row():
-            with gr.Column():
-                file_input = gr.File(label="آپلود فایل (تصویر یا PDF)")
-                with gr.Accordion("تنظیمات پیشرفته", open=False):
-                    use_cache = gr.Checkbox(label="استفاده از کش (Cache)", value=True)
-                    preprocessing = gr.Checkbox(label="فعال‌سازی پیش‌پردازش", value=True)
-                    confidence = gr.Slider(0.1, 1.0, value=0.7, label="آستانه اطمینان (Confidence Threshold)")
-                    scale = gr.Slider(100, 400, value=200, step=50, label="مقیاس تصویر (%)")
-                    enhance_contrast = gr.Checkbox(label="بهبود کنتراست", value=True)
-                    reduce_noise = gr.Checkbox(label="کاهش نویز", value=True)
-                    extract_images = gr.Checkbox(label="استخراج تصاویر از PDF", value=True)
-                submit_btn = gr.Button("پردازش متن")
-            with gr.Column():
-                outputs = [
-                    gr.Textbox(label="متن استخراج‌شده", lines=10),
-                    gr.Textbox(label="اعداد استخراج‌شده", lines=2),
-                    gr.Textbox(label="میزان اطمینان (Confidence)"),
-                    gr.Textbox(label="مدل OCR استفاده‌شده"),
-                    gr.Textbox(label="زمان پردازش"),
-                    gr.Textbox(label="کیفیت تصویر"),
-                    gr.Textbox(label="اطلاعات پیش‌پردازش", lines=5)
-                ]
-        submit_btn.click(
-            fn=process_file,
-            inputs=[file_input, use_cache, preprocessing, confidence, scale, enhance_contrast, reduce_noise, extract_images],
-            outputs=outputs
-        )
-    return interface
-# --- تابع اصلی ---
-def main():
-    try:
-        logger.info("START Initializing system")
-        setup_system_dependencies()  # نصب وابستگی‌های سیستمی
-        os.makedirs('/app/logs', exist_ok=True)
-        os.makedirs('/app/cache', exist_ok=True)
-        setup_cache_dir()
-        device = "GPU" if torch.cuda.is_available() else "CPU"
-        logger.info(f"SUCCESS Using {device}")
-        # اجرای Gradio برای Hugging Face Spaces
-        interface = create_gradio_interface()
-        interface.launch(server_name="0.0.0.0", server_port=7860, share=False)
-    except Exception as e:
-        logger.error(f"FAILURE Main error: {str(e)}")
-        raise
-if __name__ == "__main__":
-    main()