Spaces:

Really-amin
/

PersianOCR

Running

App Files Files Community

PersianOCR / appv00v.py

Really-amin

Rename app.py to appv00v.py

9a7ec51 verified 5 months ago

raw

history blame contribute delete

59.3 kB

	# Core Python imports
	import os
	import subprocess
	import logging
	from logging.handlers import RotatingFileHandler
	import time
	from pathlib import Path
	from typing import Dict, List, Optional, Any, Union, Tuple
	from dataclasses import dataclass
	from collections import OrderedDict, namedtuple
	import tempfile
	import shutil
	import hashlib
	import pickle
	import threading
	from concurrent.futures import ThreadPoolExecutor, as_completed
	import warnings
	import io
	import re
	import requests

	# Third-party imports
	import numpy as np
	import cv2
	from PIL import Image
	import pytesseract
	import easyocr
	import torch
	import fitz # PyMuPDF
	from tqdm import tqdm
	import gradio as gr
	from hazm import Normalizer, word_tokenize
	from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
	import psutil
	import tensorflow as tf

	# Configuration and setup
	warnings.filterwarnings('ignore')
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	tf.get_logger().setLevel('ERROR')

	# Constants
	TESSERACT_CMD = '/usr/bin/tesseract'
	TESSDATA_PREFIX_DEFAULT = '/usr/share/tesseract-ocr/4.00/tessdata'
	TESSDATA_LOCAL = os.path.expanduser('~/.tessdata') # Writable user directory
	STATUS_EMOJIS = {
	'START': '🟦',
	'SUCCESS': '✅',
	'FAILURE': '❌',
	'LOADING': '⏳',
	'PROCESSING': '🔄',
	'WARNING': '⚠️',
	'MEMORY': '💾'
	}

	# Data Structures
	OCRResult = namedtuple('OCRResult', [
	'text', 'numbers', 'confidence', 'model_name', 'processing_time',
	'image_quality', 'detected_language', 'word_count', 'char_count',
	'preprocessing_info', 'error_rate'
	])

	# Logging Configuration
	class CustomFormatter(logging.Formatter):
	"""Custom formatter for logging with colors and emojis"""

	COLORS = {
	'grey': "\x1b[38;21m",
	'blue': "\x1b[38;5;39m",
	'yellow': "\x1b[38;5;226m",
	'red': "\x1b[38;5;196m",
	'bold_red': "\x1b[31;1m",
	'reset': "\x1b[0m"
	}

	def __init__(self, fmt):
	super().__init__()
	self.fmt = fmt
	self.FORMATS = {
	logging.DEBUG: self.COLORS['grey'] + self.fmt + self.COLORS['reset'],
	logging.INFO: self.COLORS['blue'] + self.fmt + self.COLORS['reset'],
	logging.WARNING: self.COLORS['yellow'] + self.fmt + self.COLORS['reset'],
	logging.ERROR: self.COLORS['red'] + self.fmt + self.COLORS['reset'],
	logging.CRITICAL: self.COLORS['bold_red'] + self.fmt + self.COLORS['reset']
	}

	def format(self, record):
	log_fmt = self.FORMATS.get(record.levelno)
	formatter = logging.Formatter(log_fmt)
	memory_usage = psutil.virtual_memory().percent
	record.msg = f"{record.msg} [Mem: {memory_usage:.1f}%]"

	for status, emoji in STATUS_EMOJIS.items():
	if status in record.msg:
	record.msg = f"{emoji} {record.msg}"

	return formatter.format(record)

	# Main OCR System Class
	class OCRSystem:
	"""Main OCR system class handling all OCR operations"""

	def __init__(self):
	self.setup_logging()
	self.setup_cache()
	self.setup_models()
	self.normalizer = Normalizer()
	self.default_settings = {
	'cache_enabled': True,
	'preprocessing_enabled': True,
	'confidence_threshold': 0.7,
	'resize': True,
	'resize_scale': 200,
	'enhance_contrast': True,
	'reduce_noise': True,
	'extract_images': True,
	'sharpen': True,
	'deskew': True,
	'optimize_for_ocr': True,
	'max_workers': 4
	}

	# Setup Methods
	def setup_logging(self):
	"""Initialize logging system"""
	self.logger = logging.getLogger(__name__)
	self.logger.setLevel(logging.DEBUG)

	os.makedirs('logs', exist_ok=True)

	file_handler = RotatingFileHandler(
	'logs/ocr.log',
	maxBytes=1010241024,
	backupCount=5,
	encoding='utf-8'
	)
	file_handler.setFormatter(
	logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	)

	console_handler = logging.StreamHandler()
	console_handler.setFormatter(
	CustomFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	)

	self.logger.addHandler(file_handler)
	self.logger.addHandler(console_handler)

	def setup_cache(self):
	"""Initialize caching system"""
	self.cache_data = OrderedDict()
	self.cache_max_size = 1000
	self.cache_lock = threading.Lock()
	self.cache_dir = Path('cache')
	self.cache_dir.mkdir(exist_ok=True)

	def setup_models(self):
	"""Initialize OCR models configuration based on available hardware"""
	self.models = {}
	self.model_performance = {
	'microsoft_trocr': {'success': 0, 'fail': 0},
	'pretrained_model': {'success': 0, 'fail': 0},
	'mT5_OCR_fa': {'success': 0, 'fail': 0},
	'LayoutLMv3_fa': {'success': 0, 'fail': 0},
	'easyocr': {'success': 0, 'fail': 0},
	'tesseract': {'success': 0, 'fail': 0},
	'persian_ocr': {'success': 0, 'fail': 0}
	}

	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.max_workers = min(4, os.cpu_count() or 1)
	self.logger.info(f"Using device: {self.device}, Max workers: {self.max_workers}")

	self.model_configs = {
	'microsoft_trocr': {
	'name': "microsoft/trocr-base-printed",
	'type': "transformer",
	'threshold': 0.85,
	'device': self.device
	},
	'pretrained_model': {
	'name': "beheshti-ai/TrOCR-fa",
	'type': "transformer",
	'threshold': 0.8,
	'device': self.device
	},
	'mT5_OCR_fa': {
	'name': "aleemeconomist/mT5-OCR-fa",
	'type': "image-to-text",
	'threshold': 0.7,
	'device': self.device
	},
	'LayoutLMv3_fa': {
	'name': "SoheilStar/LayoutLMv3-fa",
	'type': "document-question-answering",
	'threshold': 0.7,
	'device': self.device
	},
	'persian_ocr': {
	'name': "Persian-OCR",
	'type': "custom",
	'threshold': 0.75,
	'device': self.device
	}
	}

	self.model_priority = [
	'microsoft_trocr', 'pretrained_model', 'mT5_OCR_fa', 'LayoutLMv3_fa',
	'easyocr', 'tesseract', 'persian_ocr'
	] if self.device == "cuda" else [
	'microsoft_trocr', 'tesseract', 'easyocr', 'pretrained_model',
	'mT5_OCR_fa', 'LayoutLMv3_fa', 'persian_ocr'
	]

	self.model_lock = threading.Lock()

	def download_tessdata(self, languages=['eng', 'fas']):
	"""Download Tesseract language data if not present"""
	tessdata_dir = TESSDATA_LOCAL
	try:
	os.makedirs(tessdata_dir, exist_ok=True)
	except PermissionError as e:
	self.logger.warning(f"WARNING: Cannot create {tessdata_dir}: {str(e)}. Tesseract may fail without language data.")
	return False

	base_url = "https://github.com/tesseract-ocr/tessdata_best/raw/main/" # Updated URL

	for lang in languages:
	file_path = os.path.join(tessdata_dir, f"{lang}.traineddata")
	if not os.path.exists(file_path):
	self.logger.info(f"LOADING: Downloading {lang}.traineddata to {file_path}")
	try:
	response = requests.get(f"{base_url}{lang}.traineddata", stream=True)
	response.raise_for_status()
	with open(file_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	self.logger.info(f"SUCCESS: Downloaded {lang}.traineddata")
	except Exception as e:
	self.logger.error(f"FAILURE: Could not download {lang}.traineddata: {str(e)}")
	else:
	self.logger.debug(f"{lang}.traineddata already exists at {file_path}")
	return True

	def setup_system_dependencies(self):
	"""Install and configure system dependencies only if not already installed"""
	self.logger.info("START: Checking and installing system dependencies")
	try:
	subprocess.run(['apt-get', 'update'], check=True)
	dependencies = [
	'tesseract-ocr', 'libopencv-dev',
	'fontconfig', 'libgl1-mesa-glx', 'libglib2.0-0',
	'libsm6', 'libxext6', 'libxrender-dev', 'libqt5gui5'
	]

	for dep in dependencies:
	try:
	result = subprocess.run(['dpkg', '-l', dep],
	capture_output=True,
	text=True)
	if result.returncode != 0 or 'ii' not in result.stdout.splitlines()[5:]:
	self.logger.info(f"PROCESSING: Installing {dep} (not found)")
	subprocess.run(['apt-get', 'install', '-y', dep], check=True)
	self.logger.info(f"SUCCESS: Installed {dep}")
	else:
	self.logger.info(f"PROCESSING: {dep} is already installed")
	except subprocess.CalledProcessError as e:
	self.logger.error(f"FAILURE: Failed to install {dep}: {str(e)}")
	return False

	# Download Tesseract language data
	tessdata_downloaded = self.download_tessdata(['eng', 'fas'])
	tessdata_prefix = TESSDATA_LOCAL if tessdata_downloaded else TESSDATA_PREFIX_DEFAULT
	os.environ['TESSDATA_PREFIX'] = tessdata_prefix
	pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD

	version = subprocess.check_output([TESSERACT_CMD, '--version'])
	self.logger.info(f"SUCCESS: Tesseract installed: {version.decode()[:50]}")
	if not os.path.exists(os.path.join(tessdata_prefix, 'eng.traineddata')):
	self.logger.warning(f"WARNING: Tesseract language data not found in {tessdata_prefix}, functionality may be limited")
	self.logger.info("SUCCESS: System dependencies setup completed")
	return True

	except subprocess.CalledProcessError as e:
	self.logger.error(f"FAILURE: Command execution error: {str(e)}")
	return False
	except Exception as e:
	self.logger.error(f"FAILURE: System dependency setup error: {str(e)}")
	return False

	# Progress Handling Utility
	def _update_progress(self, step: int, total_steps: int, progress=None):
	"""Safely update progress with robust error handling"""
	if progress is None:
	return
	try:
	progress_value = step / total_steps
	self.logger.debug(f"Updating progress: {progress_value:.3f}")
	progress(progress_value)
	except Exception as e:
	self.logger.warning(f"WARNING Progress update failed: {str(e)}")

	# Model Loading
	def load_model(self, model_name: str, progress=None):
	"""Load a specific OCR model with detailed debugging"""
	with self.model_lock:
	if model_name in self.models:
	self.logger.debug(f"Model {model_name} already loaded")
	return True

	self.logger.info(f"START Loading model: {model_name}")
	try:
	config = self.model_configs.get(model_name)
	if config:
	self.logger.debug(f"Config for {model_name}: {config}")
	if config['type'] == "image-to-text":
	self.logger.debug(f"Loading image-to-text pipeline for {model_name}")
	self.models[model_name] = pipeline(
	config['type'],
	model=config['name'],
	device=config['device']
	)
	elif config['type'] == "document-question-answering":
	self.logger.debug(f"Loading document-question-answering pipeline for {model_name}")
	self.models[model_name] = pipeline(
	config['type'],
	model=config['name'],
	device=config['device']
	)
	elif config['type'] == "transformer":
	self.logger.debug(f"Loading transformer for {model_name}")
	self._update_progress(1, 3, progress)
	processor = TrOCRProcessor.from_pretrained(config['name'])
	self.logger.debug(f"Processor loaded for {model_name}")
	self._update_progress(2, 3, progress)
	model = VisionEncoderDecoderModel.from_pretrained(config['name'])
	self.logger.debug(f"Model loaded for {model_name}")
	self.models[model_name] = {
	'processor': processor,
	'model': model,
	'device': config['device']
	}
	elif config['type'] == "custom" and model_name == "persian_ocr":
	self.logger.debug(f"Setting custom model {model_name}")
	self.models[model_name] = True
	elif model_name == "easyocr":
	self.logger.debug(f"Loading EasyOCR for {model_name}")
	easyocr_cache_dir = os.path.expanduser('~/.EasyOCR')
	if not os.path.exists(easyocr_cache_dir):
	self.logger.info("LOADING: EasyOCR models not found, downloading now...")
	else:
	self.logger.debug("EasyOCR model cache found, skipping download")
	self._update_progress(1, 2, progress)
	self.models[model_name] = easyocr.Reader(
	['fa', 'en'],
	gpu=(self.device == "cuda"),
	download_enabled=True
	)
	if self.device != "cuda":
	self.logger.warning("WARNING: EasyOCR running on CPU, consider GPU for faster processing")
	elif model_name == "tesseract":
	self.logger.debug(f"Configuring Tesseract for {model_name}")
	tessdata_prefix = os.environ.get('TESSDATA_PREFIX', TESSDATA_LOCAL)
	if not os.path.exists(os.path.join(tessdata_prefix, 'eng.traineddata')):
	self.logger.error(f"FAILURE: eng.traineddata not found in {tessdata_prefix}, Tesseract may fail")
	else:
	self.logger.debug(f"Tesseract language data found at {tessdata_prefix}")
	pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
	self.models[model_name] = True

	self.logger.info(f"SUCCESS Model {model_name} loaded")
	self.model_performance[model_name]['success'] += 1
	self._update_progress(3 if config and config['type'] == "transformer" else 2,
	3 if config and config['type'] == "transformer" else 2,
	progress)
	return True

	except Exception as e:
	self.logger.error(f"FAILURE Error loading {model_name}: {str(e)}")
	self.model_performance[model_name]['fail'] += 1
	self._update_progress(3 if config and config['type'] == "transformer" else 2,
	3 if config and config['type'] == "transformer" else 2,
	progress)
	return False

	# Image Processing
	def process_single_image(self, image_path: str, settings: Dict, progress=None) -> OCRResult:
	"""Process a single image file"""
	self.logger.debug(f"Processing single image: {image_path}")
	try:
	image = cv2.imread(image_path)
	if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
	self.logger.error(f"FAILURE Unable to read or invalid image: {image_path}")
	return self._create_empty_result()

	enhanced_image, preprocessing_info = self.enhance_for_persian(image, settings, progress)
	if enhanced_image is None:
	self.logger.error(f"FAILURE Image enhancement failed: {image_path}")
	return self._create_empty_result()

	self.logger.debug(f"Calling process_image with enhanced image shape: {enhanced_image.shape}")
	result = self.process_image(enhanced_image, progress)
	self.logger.debug(f"process_image returned: {result}")

	if result:
	result = result._replace(preprocessing_info=preprocessing_info)
	return result

	return self._create_empty_result()

	except Exception as e:
	self.logger.error(f"FAILURE Error processing image: {str(e)}")
	return self._create_empty_result()

	def process_image(self, image: np.ndarray, progress=None) -> Optional[OCRResult]:
	"""Process an image using available OCR models"""
	start_time = time.time()
	self.logger.debug("Starting process_image")
	self._update_progress(0, len(self.model_priority) + 1, progress)

	if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
	self.logger.error("FAILURE Input image is invalid or empty")
	raise ValueError("Invalid or empty input image")

	self.logger.debug(f"Processing image with shape: {image.shape}")

	sorted_models = sorted(
	self.model_priority,
	key=lambda x: self.model_performance[x]['success'] / (self.model_performance[x]['fail'] + 1),
	reverse=True
	)
	self.logger.debug(f"Sorted models: {sorted_models}")

	for i, model_name in enumerate(sorted_models):
	try:
	self.logger.debug(f"Attempting to load model: {model_name}")
	if not self.load_model(model_name, progress):
	self.logger.warning(f"WARNING Failed to load model: {model_name}")
	continue

	self._update_progress(i + 1, len(sorted_models) + 1, progress)

	self.logger.debug(f"Processing with model: {model_name}")
	result = self._process_with_model(image, model_name)
	self.logger.debug(f"Model {model_name} result: {result}")

	if result and result.get('text', '').strip():
	processing_time = time.time() - start_time
	self.logger.debug(f"Formatting result for {model_name}")
	ocr_result = self._format_result(
	result['text'],
	result.get('confidence', 0.5),
	model_name,
	processing_time
	)
	self.logger.debug(f"Formatted OCR result: {ocr_result}")

	threshold = self.model_configs.get(model_name, {}).get('threshold', 0.5)
	if ocr_result.confidence >= threshold:
	self.logger.info(f"SUCCESS Model {model_name} succeeded")
	self._update_progress(len(sorted_models) + 1, len(sorted_models) + 1, progress)
	return ocr_result

	except Exception as e:
	self.logger.warning(f"WARNING Model {model_name} failed: {str(e)}")
	continue

	self.logger.warning("WARNING No model succeeded")
	self._update_progress(len(sorted_models) + 1, len(sorted_models) + 1, progress)
	return None

	# Model-Specific Processing
	def _process_with_model(self, image: np.ndarray, model_name: str) -> Dict:
	"""Process image with a specific model"""
	if image is None or not isinstance(image, np.ndarray) or image.size == 0:
	self.logger.error(f"FAILURE Invalid image for {model_name}")
	return {'text': '', 'confidence': 0}

	if model_name in self.model_configs:
	config = self.model_configs[model_name]
	if config['type'] == "transformer":
	return self._process_transformer_model(image, model_name)
	elif config['type'] in ["image-to-text", "document-question-answering"]:
	return self._process_pipeline_model(image, model_name)
	elif model_name == 'persian_ocr':
	return self._process_persian_ocr(image)
	elif model_name == 'easyocr':
	return self._process_easyocr(image)
	elif model_name == 'tesseract':
	return self._process_tesseract(image)
	return {'text': '', 'confidence': 0}

	def _process_transformer_model(self, image: np.ndarray, model_name: str) -> Dict:
	"""Process image using transformer-based model (e.g., Microsoft TrOCR)"""
	try:
	pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	model_data = self.models[model_name]
	processor = model_data['processor']
	model = model_data['model']
	device = model_data['device']

	pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
	generated_ids = model.generate(pixel_values)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return {
	'text': generated_text,
	'confidence': self.model_configs[model_name]['threshold']
	}
	except Exception as e:
	self.logger.error(f"FAILURE Transformer model processing failed: {str(e)}")
	return {'text': '', 'confidence': 0}

	def _process_pipeline_model(self, image: np.ndarray, model_name: str) -> Dict:
	"""Process image using pipeline model"""
	try:
	pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	model = self.models[model_name]

	if self.model_configs[model_name]['type'] == "image-to-text":
	result = model(pil_image)[0]
	return {
	'text': result['generated_text'],
	'confidence': self.model_configs[model_name]['threshold']
	}
	else: # document-question-answering
	result = model(pil_image)
	return {
	'text': result['answer'],
	'confidence': self.model_configs[model_name]['threshold']
	}
	except Exception as e:
	self.logger.error(f"FAILURE Pipeline model processing failed: {str(e)}")
	return {'text': '', 'confidence': 0}

	def _process_easyocr(self, image: np.ndarray) -> Dict:
	"""Process image using EasyOCR"""
	try:
	results = self.models['easyocr'].readtext(image)
	if not results:
	return {'text': '', 'confidence': 0}

	texts = [text for _, text, _ in results]
	confidence_sum = sum(conf for _, _, conf in results)
	confidence_avg = confidence_sum / len(results) if results else 0

	return {
	'text': ' '.join(texts),
	'confidence': confidence_avg
	}
	except Exception as e:
	self.logger.error(f"FAILURE EasyOCR processing failed: {str(e)}")
	return {'text': '', 'confidence': 0}

	def _process_tesseract(self, image: np.ndarray) -> Dict:
	"""Process image using Tesseract"""
	try:
	text = pytesseract.image_to_string(
	image,
	config='--oem 3 --psm 6 -l fas+eng'
	)
	return {'text': text, 'confidence': 0.5}
	except Exception as e:
	self.logger.error(f"FAILURE Tesseract processing failed: {str(e)}")
	return {'text': '', 'confidence': 0}

	def _process_persian_ocr(self, image: np.ndarray) -> Dict:
	"""Process image using Persian OCR"""
	try:
	if image is None or not isinstance(image, np.ndarray) or image.size == 0:
	return {'text': '', 'confidence': 0}

	text = self.persian_ocr_main(image, langs="fa", mode="tn")
	return {'text': text, 'confidence': 0.75}
	except Exception as e:
	self.logger.error(f"FAILURE Persian-OCR processing failed: {str(e)}")
	return {'text': '', 'confidence': 0}

	# Result Formatting
	def _format_result(self, text: str, confidence: float, model_name: str, processing_time: float) -> OCRResult:
	"""Format OCR results into standardized output"""
	try:
	normalized_text = self.normalizer.normalize(text)
	words = word_tokenize(normalized_text)

	persian_nums = '۰۱۲۳۴۵۶۷۸۹'
	number_pattern = f'^[0-9{persian_nums}]+([\\.,،٫][0-9{persian_nums}]+)?'

	numbers = [w for w in words if re.match(number_pattern, w)]
	text_list = [w for w in words if not re.match(number_pattern, w)]

	return OCRResult(
	text=text_list,
	numbers=numbers,
	confidence=confidence,
	model_name=model_name,
	processing_time=processing_time,
	image_quality=self._assess_quality(text_list),
	detected_language=self._detect_language(text_list),
	word_count=len(text_list),
	char_count=sum(len(w) for w in text_list),
	preprocessing_info={},
	error_rate=self._estimate_error_rate(text_list, confidence)
	)
	except Exception as e:
	self.logger.error(f"FAILURE Formatting result failed: {str(e)}")
	return self._create_empty_result()

	def _estimate_error_rate(self, text_list: List[str], confidence: float) -> float:
	"""Estimate error rate based on text characteristics and confidence"""
	if not text_list:
	return 1.0

	avg_word_length = sum(len(w) for w in text_list) / len(text_list)
	return max(0.0, min(1.0, 1.0 - confidence + (3 - avg_word_length) / 10))

	def _assess_quality(self, text_list: List[str]) -> str:
	"""Assess the quality of extracted text"""
	if not text_list:
	return "Low"

	avg_word_length = sum(len(w) for w in text_list) / len(text_list)
	word_count = len(text_list)

	if word_count > 50 and avg_word_length > 3:
	return "High"
	elif word_count > 20 and avg_word_length > 2:
	return "Medium"
	else:
	return "Low"

	def _detect_language(self, text_list: List[str]) -> str:
	"""Detect the dominant language in the text"""
	if not text_list:
	return "Unknown"

	persian_pattern = re.compile(r'[\u0600-\u06FF]')
	english_pattern = re.compile(r'[a-zA-Z]')

	persian_chars = sum(1 for word in text_list
	for _ in persian_pattern.finditer(word))
	english_chars = sum(1 for word in text_list
	for _ in english_pattern.finditer(word))

	if persian_chars > english_chars:
	return "Persian"
	elif english_chars > persian_chars:
	return "English"
	else:
	return "Mixed"

	# Persian OCR Specific
	def persian_ocr_main(self, image: np.ndarray, langs="fa", mode="tn") -> str:
	"""Main Persian OCR processing function"""
	if image is None or not isinstance(image, np.ndarray) or image.size == 0:
	self.logger.error("FAILURE Invalid image for Persian OCR")
	return ""

	try:
	with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_input:
	temp_input_path = temp_input.name
	cv2.imwrite(temp_input_path, image)

	with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as temp_output:
	temp_output_path = temp_output.name

	im = Image.open(temp_input_path)
	length_x, width_y = im.size
	factor = float(1024.0 / length_x)
	size = int(factor * length_x), int(factor * width_y)
	image_resize = im.resize(size, Image.Resampling.LANCZOS)
	image_resize.save(f"{temp_input_path}_Upscaled.png", dpi=(300, 300))

	img = cv2.imread(f"{temp_input_path}_Upscaled.png")
	if img is None:
	self.logger.error("FAILURE Failed to read upscaled image")
	return ""

	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	if gray is None:
	self.logger.error("FAILURE Failed to convert to grayscale")
	return ""

	if langs == "fa":
	if mode == "t":
	custom_config = r'-l fas --psm 6 -c tessedit_char_blacklist="۰١۲۳۴۵۶۷۸۹«»1234567890#"'
	elif mode == "tn":
	custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی ۰١۲۳۴۵۶۷۸۹.?!,،:;/"'
	elif mode == "table":
	custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی۰١۲۳۴۵۶۷۸۹"'
	elif langs == "en":
	custom_config = r'-l eng --psm 6'
	elif langs == "faen":
	custom_config = r'-l fas+eng --psm 6'
	else:
	raise ValueError("Invalid language option")

	text = pytesseract.image_to_string(gray, config=custom_config)

	with io.open(temp_output_path, 'w', encoding='utf8') as f:
	f.write(text)

	return text

	except Exception as e:
	self.logger.error(f"FAILURE Persian OCR failed: {str(e)}")
	return ""
	finally:
	for file in [temp_input_path, f"{temp_input_path}_Upscaled.png", temp_output_path]:
	try:
	os.remove(file)
	except Exception:
	pass

	# Image Enhancement
	def enhance_for_persian(self, image: np.ndarray, settings: Dict[str, Any], progress=None) -> Tuple[Optional[np.ndarray], Dict]:
	"""Enhance image for Persian text recognition with robust validation and debugging"""
	info = {}
	if image is None or not isinstance(image, np.ndarray) or image.size == 0 or len(image.shape) < 2:
	self.logger.error("FAILURE Invalid input image for enhancement")
	return None, {}

	self.logger.debug(f"Enhancing image with shape: {image.shape}")
	try:
	processed = image.copy()
	if processed is None or not isinstance(processed, np.ndarray) or processed.size == 0:
	self.logger.error("FAILURE Failed to create image copy")
	return None, info
	self.logger.debug(f"Initial processed shape: {processed.shape}")

	step = 0
	total_steps = 7

	# Step 1: Convert to grayscale
	if len(processed.shape) == 3:
	self.logger.debug("Starting grayscale conversion")
	try:
	processed = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
	if processed is None:
	self.logger.error("FAILURE Grayscale conversion returned None")
	return None, info
	if len(processed.shape) != 2:
	self.logger.error(f"FAILURE Grayscale conversion produced invalid shape: {processed.shape}")
	return None, info
	self.logger.debug(f"After grayscale shape: {processed.shape}")
	info['grayscale'] = True
	except Exception as e:
	self.logger.error(f"FAILURE Grayscale conversion failed: {str(e)}")
	return None, info
	else:
	self.logger.debug("Image already grayscale, skipping conversion")
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 2: Resize
	if settings.get('resize'):
	scale_percent = settings.get('resize_scale', 200)
	if scale_percent != 100:
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for resize")
	return None, info
	self.logger.debug(f"Starting resize with scale {scale_percent}%")
	h, w = processed.shape[:2]
	new_w = int(w * scale_percent / 100)
	new_h = int(h * scale_percent / 100)
	if new_w <= 0 or new_h <= 0:
	self.logger.error(f"FAILURE Invalid resize dimensions: {new_w}x{new_h}")
	return None, info
	try:
	processed = cv2.resize(processed, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Resize operation returned invalid result")
	return None, info
	self.logger.debug(f"After resize shape: {processed.shape}")
	info['resized'] = f"{scale_percent}%"
	except Exception as e:
	self.logger.error(f"FAILURE Resize failed: {str(e)}")
	return None, info
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 3: Contrast Enhancement
	if settings.get('enhance_contrast'):
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for contrast enhancement")
	return None, info
	self.logger.debug("Starting contrast enhancement")
	try:
	clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
	processed = clahe.apply(processed)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Contrast enhancement returned invalid result")
	return None, info
	self.logger.debug(f"After contrast enhancement shape: {processed.shape}")
	info['contrast_enhanced'] = True
	except Exception as e:
	self.logger.error(f"FAILURE Contrast enhancement failed: {str(e)}")
	return None, info
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 4: Noise Reduction
	if settings.get('reduce_noise'):
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for noise reduction")
	return None, info
	self.logger.debug("Starting noise reduction")
	try:
	processed = cv2.bilateralFilter(processed, 9, 75, 75)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Noise reduction returned invalid result")
	return None, info
	self.logger.debug(f"After noise reduction shape: {processed.shape}")
	info['noise_reduced'] = True
	except Exception as e:
	self.logger.error(f"FAILURE Noise reduction failed: {str(e)}")
	return None, info
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 5: Sharpening
	if settings.get('sharpen'):
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for sharpening")
	return None, info
	self.logger.debug("Starting sharpening")
	kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
	try:
	processed = cv2.filter2D(processed, -1, kernel)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Sharpening returned invalid result")
	return None, info
	self.logger.debug(f"After sharpening shape: {processed.shape}")
	info['sharpened'] = True
	except Exception as e:
	self.logger.error(f"FAILURE Sharpening failed: {str(e)}")
	return None, info
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 6: Deskew
	if settings.get('deskew'):
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for deskew")
	return None, info
	self.logger.debug("Starting deskew")
	coords = np.column_stack(np.where(processed > 0))
	self.logger.debug(f"Coords shape: {coords.shape}")
	if coords.size >= 5:
	try:
	rect = cv2.minAreaRect(coords)
	self.logger.debug(f"Rect: {rect}")
	if not (isinstance(rect, tuple) and len(rect) == 3 and
	isinstance(rect[0], tuple) and isinstance(rect[1], tuple) and isinstance(rect[2], (int, float))):
	self.logger.warning("WARNING Invalid rect structure from minAreaRect")
	else:
	angle = rect[2]
	if angle < -45:
	angle = 90 + angle
	h, w = processed.shape[:2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	processed = cv2.warpAffine(
	processed, M, (w, h),
	flags=cv2.INTER_CUBIC,
	borderMode=cv2.BORDER_REPLICATE
	)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Deskew operation returned invalid result")
	return None, info
	self.logger.debug(f"After deskew shape: {processed.shape}")
	info['deskewed'] = f"angle: {angle:.2f}"
	except Exception as e:
	self.logger.warning(f"WARNING Deskew failed: {str(e)}")
	else:
	self.logger.warning("WARNING Not enough points for deskewing (coords.size < 5)")
	step += 1
	self._update_progress(step, total_steps, progress)

	# Step 7: Thresholding
	if settings.get('threshold'):
	if len(processed.shape) < 2:
	self.logger.error("FAILURE Invalid shape for thresholding")
	return None, info
	self.logger.debug("Starting thresholding")
	try:
	processed = cv2.adaptiveThreshold(
	processed, 255,
	cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 11, 2
	)
	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Thresholding returned invalid result")
	return None, info
	self.logger.debug(f"After thresholding shape: {processed.shape}")
	info['thresholded'] = True
	except Exception as e:
	self.logger.error(f"FAILURE Thresholding failed: {str(e)}")
	return None, info
	step += 1
	self._update_progress(step, total_steps, progress)

	if processed is None or len(processed.shape) < 2:
	self.logger.error("FAILURE Final image has invalid shape")
	return None, info
	self.logger.debug(f"Final processed shape: {processed.shape}")
	return processed, info

	except Exception as e:
	self.logger.error(f"FAILURE Image enhancement error: {str(e)}")
	self._update_progress(total_steps, total_steps, progress)
	return None, {}

	# PDF Processing
	def process_pdf(self, pdf_path: str, settings: Optional[Dict] = None, progress=None) -> List[OCRResult]:
	"""Process a PDF document and extract text from all pages"""
	self.logger.info(f"START Processing PDF: {pdf_path}")
	results = []
	current_settings = self.default_settings.copy()
	if settings:
	current_settings.update(settings)

	if not pdf_path or not os.path.exists(pdf_path):
	self.logger.error(f"FAILURE PDF file not found: {pdf_path}")
	return [self._create_empty_result()]

	if not pdf_path.lower().endswith('.pdf'):
	self.logger.error(f"FAILURE Input file is not a PDF: {pdf_path}")
	return [self._create_empty_result()]

	try:
	self.logger.debug(f"Calling optimize_pdf_document with path: {pdf_path}")
	optimized_pdf = self.optimize_pdf_document(pdf_path, current_settings, progress)
	self.logger.debug(f"optimize_pdf_document returned: {optimized_pdf}")
	if not optimized_pdf or not os.path.exists(optimized_pdf):
	self.logger.error(f"FAILURE Optimized PDF not generated: {optimized_pdf}")
	return [self._create_empty_result()]

	self.logger.debug(f"Calling process_pdf_document with path: {optimized_pdf}")
	images = self.process_pdf_document(optimized_pdf, current_settings, progress)
	self.logger.debug(f"process_pdf_document returned {len(images)} images")
	if not images:
	self.logger.warning("WARNING No images extracted from PDF")
	return [self._create_empty_result()]

	with ThreadPoolExecutor(max_workers=current_settings.get('max_workers', 4)) as executor:
	futures = [
	executor.submit(self.process_single_image, img, current_settings, progress)
	for img in images if img is not None
	]
	for future in as_completed(futures):
	result = future.result()
	self.logger.debug(f"Thread result: {result}")
	if result and result.text:
	results.append(result)

	self.logger.info(f"SUCCESS Processed {len(results)} pages")
	return results if results else [self._create_empty_result()]

	except Exception as e:
	self.logger.error(f"FAILURE PDF processing failed: {str(e)}")
	return [self._create_empty_result()]

	def _create_empty_result(self) -> OCRResult:
	"""Create an empty OCR result"""
	return OCRResult(
	text=[], numbers=[], confidence=0.0,
	model_name="None", processing_time=0.0,
	image_quality="Unknown", detected_language="Unknown",
	word_count=0, char_count=0, preprocessing_info={},
	error_rate=1.0
	)

	def process_pdf_document(self, pdf_path: str, settings: Dict[str, Any], progress=None) -> List[np.ndarray]:
	"""Extract and process images from PDF document"""
	self.logger.info(f"START Processing PDF: {pdf_path}")
	all_images = []

	try:
	doc = fitz.open(pdf_path)
	total_pages = len(doc)
	batch_size = settings.get('batch_size', 2)
	batches = [range(i, min(i + batch_size, total_pages))
	for i in range(0, total_pages, batch_size)]

	with tqdm(total=total_pages, desc="📄 Processing PDF") as pbar:
	for batch in batches:
	with ThreadPoolExecutor(max_workers=batch_size) as executor:
	futures = {
	executor.submit(
	self._process_pdf_page,
	doc, page_num, settings, progress
	): page_num for page_num in batch
	}
	for future in as_completed(futures):
	result = future.result()
	self.logger.debug(f"Page result: {result}")
	if result and isinstance(result, list):
	all_images.extend(result)
	pbar.update(1)

	self.logger.info(f"SUCCESS Extracted {len(all_images)} images")
	return all_images

	except Exception as e:
	self.logger.error(f"FAILURE PDF processing failed: {str(e)}")
	return []

	def _process_pdf_page(self, doc, page_num: int, settings: Dict[str, Any], progress=None) -> List[np.ndarray]:
	"""Process a single PDF page"""
	images = []
	try:
	page = doc.load_page(page_num)
	pix = page.get_pixmap(matrix=fitz.Matrix(
	settings.get('scale_factor', 2),
	settings.get('scale_factor', 2)
	))

	if not self._validate_pixmap(pix):
	self.logger.warning(f"WARNING Invalid pixmap for page {page_num + 1}")
	return []

	img = self._pixmap_to_image(pix)
	if img is None:
	self.logger.warning(f"WARNING Failed to convert pixmap to image for page {page_num + 1}")
	return []

	processed_img, _ = self.enhance_for_persian(img, settings, progress)
	if processed_img is not None:
	images.append(processed_img)

	if settings.get('extract_images', True):
	embedded_images = self._extract_embedded_images(doc, page, page_num)
	images.extend(embedded_images)

	if progress:
	progress(1.0)
	return images

	except Exception as e:
	self.logger.error(f"FAILURE Page {page_num + 1} processing failed: {str(e)}")
	if progress:
	progress(1.0)
	return []

	def _validate_pixmap(self, pix) -> bool:
	"""Validate pixmap data with detailed logging"""
	if not hasattr(pix, 'n') or not hasattr(pix, 'width') or not hasattr(pix, 'height') or not hasattr(pix, 'samples'):
	self.logger.error("FAILURE Pixmap missing required attributes")
	return False
	if pix.n <= 0:
	self.logger.error("FAILURE Pixmap has invalid number of components")
	return False
	if pix.width <= 0 or pix.height <= 0:
	self.logger.error("FAILURE Pixmap has invalid dimensions")
	return False
	if pix.samples is None or len(pix.samples) == 0:
	self.logger.error("FAILURE Pixmap has no sample data")
	return False
	expected_size = pix.width * pix.height * pix.n
	if len(pix.samples) != expected_size:
	self.logger.error(f"FAILURE Pixmap sample size mismatch: expected {expected_size}, got {len(pix.samples)}")
	return False
	return True

	def _pixmap_to_image(self, pix) -> Optional[np.ndarray]:
	"""Convert pixmap to numpy array with validation"""
	try:
	if not self._validate_pixmap(pix):
	return None

	img_data = np.frombuffer(pix.samples, dtype=np.uint8)
	expected_size = pix.width * pix.height * pix.n

	if img_data.size != expected_size:
	self.logger.error(
	f"FAILURE Pixmap data size mismatch: "
	f"expected {expected_size}, got {img_data.size}"
	)
	return None

	reshaped = img_data.reshape(pix.height, pix.width, pix.n)
	if reshaped is None or len(reshaped.shape) < 2:
	self.logger.error("FAILURE Failed to reshape pixmap data")
	return None
	return reshaped

	except Exception as e:
	self.logger.error(f"FAILURE Pixmap conversion failed: {str(e)}")
	return None

	def _extract_embedded_images(self, doc, page, page_num: int) -> List[np.ndarray]:
	"""Extract embedded images from PDF page"""
	images = []
	for img_info in page.get_images(full=True):
	try:
	xref = img_info[0]
	base_image = fitz.Pixmap(doc, xref)

	if base_image.n >= 4:
	base_image = fitz.Pixmap(fitz.csRGB, base_image)

	if not self._validate_pixmap(base_image):
	continue

	img_array = np.frombuffer(base_image.samples, dtype=np.uint8).reshape(
	base_image.height, base_image.width,
	3 if base_image.n >= 3 else 1
	)
	if img_array is None or len(img_array.shape) < 2:
	self.logger.warning(f"WARNING Failed to reshape embedded image on page {page_num + 1}")
	continue

	if img_array.shape[0] > 100 and img_array.shape[1] > 100:
	processed_img, _ = self.enhance_for_persian(img_array, {})
	if processed_img is not None:
	images.append(processed_img)

	except Exception as e:
	self.logger.warning(
	f"WARNING Failed to process embedded image on page {page_num + 1}: {str(e)}"
	)
	continue

	return images

	def optimize_pdf_document(self, pdf_path: str, settings: Dict[str, Any], progress=None) -> str:
	"""Optimize PDF document for OCR processing"""
	self.logger.info(f"START Optimizing PDF: {pdf_path}")
	if not os.path.exists(pdf_path):
	self.logger.error(f"FAILURE PDF file not found: {pdf_path}")
	return pdf_path

	try:
	output_path = str(Path(f"optimized_{Path(pdf_path).name}"))
	doc = fitz.open(pdf_path)
	new_doc = fitz.open()

	total_pages = len(doc)
	for page_num in tqdm(range(total_pages), desc="📄 Optimizing PDF"):
	page = doc.load_page(page_num)
	pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))

	if not self._validate_pixmap(pix):
	continue

	img = self._pixmap_to_image(pix)
	if img is None:
	continue

	processed_img, _ = self.enhance_for_persian(img, settings, progress)
	if processed_img is None:
	continue

	img_path = Path(tempfile.mkdtemp()) / f"temp_page_{page_num}.jpg"
	cv2.imwrite(str(img_path), processed_img)

	temp_doc = fitz.open(str(img_path))
	new_doc.insert_pdf(temp_doc)
	temp_doc.close()
	os.remove(img_path)

	new_doc.save(output_path)
	self.logger.info(f"SUCCESS PDF optimized: {output_path}")
	if progress:
	progress(1.0)
	return output_path

	except Exception as e:
	self.logger.error(f"FAILURE PDF optimization failed: {str(e)}")
	if progress:
	progress(1.0)
	return pdf_path

	# Gradio Interface
	def create_gradio_interface(self):
	"""Create Gradio web interface"""
	def process_file(file, use_cache: bool, preprocessing: bool,
	confidence: float, scale: int, enhance_contrast: bool,
	reduce_noise: bool, extract_images: bool):
	"""Handle file processing in Gradio interface"""
	if file is None:
	self.logger.error("FAILURE No file provided")
	return self._empty_interface_result("No file uploaded")

	settings = {
	'cache_enabled': use_cache,
	'preprocessing_enabled': preprocessing,
	'confidence_threshold': confidence,
	'resize': True,
	'resize_scale': scale,
	'enhance_contrast': enhance_contrast,
	'reduce_noise': reduce_noise,
	'extract_images': extract_images,
	'sharpen': True,
	'deskew': True,
	'optimize_for_ocr': True
	}

	progress = gr.Progress(track_tqdm=True)

	try:
	if file.name.lower().endswith('.pdf'):
	return self._process_pdf_interface(file.name, settings, progress)
	else:
	return self._process_image_interface(file.name, settings, progress)
	except Exception as e:
	self.logger.error(f"FAILURE Interface error: {str(e)}")
	return self._empty_interface_result(str(e))

	with gr.Blocks(title="Persian OCR System") as interface:
	gr.Markdown("# Advanced Persian OCR System")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload File (Image or PDF)")

	with gr.Accordion("Advanced Settings", open=False):
	use_cache = gr.Checkbox(label="Use Cache", value=True)
	preprocessing = gr.Checkbox(label="Enable Preprocessing", value=True)
	confidence = gr.Slider(0.1, 1.0, value=0.7, label="Confidence Threshold")
	scale = gr.Slider(100, 400, value=200, step=50, label="Image Scale (%)")
	enhance_contrast = gr.Checkbox(label="Enhance Contrast", value=True)
	reduce_noise = gr.Checkbox(label="Reduce Noise", value=True)
	extract_images = gr.Checkbox(label="Extract Images from PDF", value=True)

	submit_btn = gr.Button("Process Text")

	with gr.Column():
	outputs = [
	gr.Textbox(label="Extracted Text", lines=10),
	gr.Textbox(label="Extracted Numbers", lines=2),
	gr.Textbox(label="Confidence Level"),
	gr.Textbox(label="OCR Model Used"),
	gr.Textbox(label="Processing Time"),
	gr.Textbox(label="Image Quality"),
	gr.Textbox(label="Preprocessing Info", lines=5)
	]

	submit_btn.click(
	fn=process_file,
	inputs=[
	file_input, use_cache, preprocessing,
	confidence, scale, enhance_contrast,
	reduce_noise, extract_images
	],
	outputs=outputs
	)

	return interface

	def _process_pdf_interface(self, file_path: str, settings: Dict, progress) -> Tuple:
	"""Process PDF file for interface"""
	results = self.process_pdf(file_path, settings, progress)

	full_text = ""
	numbers_combined = []
	confidences = []
	models_used = []
	times = []
	qualities = []
	preprocess_infos = []

	for res in results:
	full_text += "\n" + " ".join(res.text)
	numbers_combined.extend(res.numbers)
	confidences.append(f"{res.confidence:.2f}")
	models_used.append(res.model_name)
	times.append(f"{res.processing_time:.2f} seconds")
	qualities.append(res.image_quality)
	preprocess_infos.append(
	"\n".join([f"{k}: {v}" for k, v in res.preprocessing_info.items()])
	)

	combined_preprocess_info = ("\nPage-wise Preprocessing Info:\n" +
	"\n\n".join(preprocess_infos) if preprocess_infos else "")

	return (
	full_text.strip(),
	", ".join(numbers_combined),
	", ".join(confidences),
	", ".join(models_used),
	", ".join(times),
	", ".join(qualities),
	combined_preprocess_info
	)

	def _process_image_interface(self, file_path: str, settings: Dict, progress) -> Tuple:
	"""Process image file for interface"""
	result = self.process_single_image(file_path, settings, progress)

	if result and result.text:
	preprocess_info = "\n".join([f"{k}: {v}"
	for k, v in result.preprocessing_info.items()]) \
	if result.preprocessing_info else ""

	return (
	"\n".join(result.text),
	", ".join(result.numbers),
	f"{result.confidence:.2f}",
	result.model_name,
	f"{result.processing_time:.2f} seconds",
	result.image_quality,
	preprocess_info
	)

	return self._empty_interface_result("No text extracted")

	def _empty_interface_result(self, message: str) -> Tuple:
	"""Create empty result for interface"""
	return ("", "", "0.0", "None", "0.0", "Unknown", message)

	# System Runner
	def run(self):
	"""Run the OCR system"""
	try:
	self.logger.info("START Initializing system")
	self.setup_system_dependencies()

	os.makedirs('logs', exist_ok=True)
	os.makedirs('cache', exist_ok=True)

	interface = self.create_gradio_interface()
	interface.launch(
	share=True,
	debug=True,
	server_name="0.0.0.0",
	server_port=7860
	)

	except Exception as e:
	self.logger.error(f"FAILURE System initialization failed: {str(e)}")
	raise

	# Main Execution
	if __name__ == "__main__":
	ocr_system = OCRSystem()
	ocr_system.run()