Spaces:

yeswanthvarma
/

answer-evaluation-app

Running

File size: 3,205 Bytes

import os
import cv2
import torch
from PIL import Image
import numpy as np

# EasyOCR
import easyocr

# TrOCR (Transformer-based OCR from Hugging Face)
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# --- Load TrOCR model and processor once ---
try:
    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
    trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
    trocr_model.eval()
    use_trocr = True
except Exception as e:
    print(f"TrOCR load failed: {e}")
    use_trocr = False

# --- Load EasyOCR once ---
try:
    reader = easyocr.Reader(['en'])
    use_easyocr = True
except Exception as e:
    print(f"EasyOCR load failed: {e}")
    use_easyocr = False

# --- Preprocess image for EasyOCR ---
def preprocess_image(image):
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    denoised = cv2.fastNlMeansDenoising(gray, h=10)
    processed = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                      cv2.THRESH_BINARY, 11, 2)
    return processed

# --- Preprocess image for TrOCR ---
def preprocess_for_trocr(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (1280, 720), interpolation=cv2.INTER_LINEAR)
    return Image.fromarray(image)

# --- TrOCR extraction ---
def extract_text_with_trocr(image_path):
    image = preprocess_for_trocr(image_path)
    pixel_values = processor(images=image, return_tensors="pt").pixel_values

    with torch.no_grad():
        generated_ids = trocr_model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return generated_text.strip()

# --- EasyOCR extraction ---
def extract_text_with_easyocr(image_path):
    image = cv2.imread(image_path)
    processed_image = preprocess_image(image)

    # Use processed image
    temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}")
    cv2.imwrite(temp_path, processed_image)
    results = reader.readtext(temp_path)
    os.remove(temp_path)

    text = ' '.join([res[1] for res in results]).strip()
    
    # Fallback to original if empty
    if not text:
        results = reader.readtext(image_path)
        text = ' '.join([res[1] for res in results]).strip()
    
    return text

# --- Main unified function ---
def extract_text_from_image(image_path):
    """
    Try extracting handwritten text using TrOCR. Fallback to EasyOCR.
    """
    try:
        if use_trocr:
            print("Using TrOCR...")
            trocr_text = extract_text_with_trocr(image_path)
            if trocr_text and len(trocr_text.strip()) > 5:
                return trocr_text
            print("TrOCR output too short. Falling back to EasyOCR...")

        if use_easyocr:
            print("Using EasyOCR...")
            return extract_text_with_easyocr(image_path)

        raise Exception("No OCR backend available.")
    
    except Exception as e:
        print(f"OCR failed: {e}")
        return "Text extraction failed."