Spaces:
Running
Running
import gradio as gr | |
from PIL import Image | |
import tempfile | |
import os | |
from IndicPhotoOCR.ocr import OCR # Ensure OCR class is saved in a file named ocr.py | |
from IndicPhotoOCR.theme import Seafoam | |
from IndicPhotoOCR.utils.helper import detect_para | |
# Possible values for identifier_lang | |
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"] # Add more as needed | |
def process_image(image, identifier_lang): | |
""" | |
Processes the uploaded image for text detection and recognition. | |
- Detects bounding boxes in the image | |
- Draws bounding boxes on the image and identifies script in each detected area | |
- Recognizes text in each cropped region and returns the annotated image and recognized text | |
Parameters: | |
image (PIL.Image): The input image to be processed. | |
identifier_lang (str): The script identifier model to use. | |
Returns: | |
tuple: A PIL.Image with bounding boxes and a string of recognized text. | |
""" | |
# Save the input image temporarily | |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input: | |
image.save(temp_input.name) | |
image_path = temp_input.name | |
# Initialize OCR with the selected identifier language | |
ocr = OCR(identifier_lang=identifier_lang, verbose=False) | |
# Detect bounding boxes on the image using OCR | |
detections = ocr.detect(image_path) | |
output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
# Draw bounding boxes on the image and save it as output | |
ocr.visualize_detection(image_path, detections, save_path=output_path) | |
# Load the annotated image with bounding boxes drawn | |
output_image = Image.open(output_path) | |
# Recognize text from the detected areas | |
recognized_text = ocr.ocr(image_path) | |
recognized_text = '\n'.join([' '.join(line) for line in recognized_text]) | |
return output_image, recognized_text | |
# Custom HTML for interface header with logos and alignment | |
interface_html = """ | |
<div style="text-align: left; padding: 10px;"> | |
<div style="background-color: white; padding: 10px; display: inline-block;"> | |
<img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;"> | |
</div> | |
<img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;"> | |
</div> | |
""" | |
# Links to GitHub and Dataset repositories with GitHub icon | |
links_html = """ | |
<div style="text-align: center; padding-top: 20px;"> | |
<a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;"> | |
GitHub Repository | |
</a> | |
<a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;"> | |
Dataset Repository | |
</a> | |
</div> | |
""" | |
# Custom CSS to style the text box and center the title | |
custom_css = """ | |
.custom-textbox textarea { | |
font-size: 20px !important; | |
} | |
#title { | |
text-align: center; | |
font-size: 28px; | |
font-weight: bold; | |
margin-bottom: 20px; | |
} | |
""" | |
# Create an instance of the Seafoam theme for a consistent visual style | |
seafoam = Seafoam() | |
# Clear function | |
def clear_inputs(): | |
return None, "auto", None, "" | |
# Define the Gradio Blocks interface | |
with gr.Blocks(theme=seafoam, css=custom_css) as demo: | |
gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title") | |
gr.Markdown("# Developed by IIT Jodhpur", elem_id="title") | |
gr.Markdown(interface_html + links_html) | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image") | |
lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto") | |
run_button = gr.Button("Run OCR") | |
clear_button = gr.Button("Clear", variant="stop") # Added Clear Button | |
with gr.Column(): | |
output_image = gr.Image(type="pil", label="Processed Image") | |
output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox") | |
# Examples shown separately (to avoid schema error) | |
gr.Examples( | |
examples=[["test_images/image_88.jpg", "auto"], | |
["test_images/image_742.jpg", "hindi"]], | |
inputs=[input_image, lang_dropdown], | |
label="Try an example" | |
) | |
# Connect logic | |
run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text]) | |
clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text]) # Clear logic | |
# Launch | |
demo.launch(share=True) | |
# # π Local server launch config | |
# if __name__ == "__main__": | |
# demo.launch( | |
# server_name="0.0.0.0", | |
# server_port=7866, | |
# share=False | |
# ) | |