File size: 5,083 Bytes
e2f99d5
 
c014c84
e2f99d5
 
 
75b1563
e2f99d5
83c2afb
 
e2f99d5
83c2afb
e2f99d5
 
 
 
 
 
 
 
83c2afb
e2f99d5
 
 
 
 
 
c014c84
 
 
83c2afb
 
c014c84
e2f99d5
 
 
 
c014c84
 
e2f99d5
c014c84
e2f99d5
 
c014c84
e2f99d5
83c2afb
 
 
e2f99d5
83c2afb
e2f99d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85f0ffb
e2f99d5
 
 
 
 
 
 
 
c014c84
e2f99d5
 
 
 
c014c84
 
 
 
 
 
 
e2f99d5
 
 
 
 
c014c84
 
 
 
 
 
0edeb0a
c014c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2a45a4
 
 
 
 
 
 
c014c84
 
 
 
 
 
 
505764d
c014c84
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
from PIL import Image
import tempfile
import os
from IndicPhotoOCR.ocr import OCR  # Ensure OCR class is saved in a file named ocr.py
from IndicPhotoOCR.theme import Seafoam
from IndicPhotoOCR.utils.helper import detect_para

# Possible values for identifier_lang
VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"]  # Add more as needed

def process_image(image, identifier_lang):
    """
    Processes the uploaded image for text detection and recognition. 
    - Detects bounding boxes in the image
    - Draws bounding boxes on the image and identifies script in each detected area
    - Recognizes text in each cropped region and returns the annotated image and recognized text

    Parameters:
    image (PIL.Image): The input image to be processed.
    identifier_lang (str): The script identifier model to use.

    Returns:
    tuple: A PIL.Image with bounding boxes and a string of recognized text.
    """
    
    # Save the input image temporarily
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input:
            image.save(temp_input.name)
            image_path = temp_input.name

    # Initialize OCR with the selected identifier language
    ocr = OCR(identifier_lang=identifier_lang, verbose=False)
    
    # Detect bounding boxes on the image using OCR
    detections = ocr.detect(image_path)
    
    output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name

    # Draw bounding boxes on the image and save it as output
    ocr.visualize_detection(image_path, detections, save_path=output_path)
    
    # Load the annotated image with bounding boxes drawn
    output_image = Image.open(output_path)
    
    # Recognize text from the detected areas
    recognized_text = ocr.ocr(image_path)
    recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
    
    return output_image, recognized_text

# Custom HTML for interface header with logos and alignment
interface_html = """
<div style="text-align: left; padding: 10px;">
    <div style="background-color: white; padding: 10px; display: inline-block;">
        <img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
    </div>
    <img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
</div>
"""



# Links to GitHub and Dataset repositories with GitHub icon
links_html = """
<div style="text-align: center; padding-top: 20px;">
    <a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
        GitHub Repository
    </a>
    <a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
        Dataset Repository
    </a>
</div>
"""

# Custom CSS to style the text box and center the title
custom_css = """
.custom-textbox textarea {
    font-size: 20px !important;
}

#title {
    text-align: center;
    font-size: 28px;
    font-weight: bold;
    margin-bottom: 20px;
}
"""

# Create an instance of the Seafoam theme for a consistent visual style
seafoam = Seafoam()

# Clear function
def clear_inputs():
    return None, "auto", None, ""

# Define the Gradio Blocks interface
with gr.Blocks(theme=seafoam, css=custom_css) as demo:
    
    gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title")
    gr.Markdown("# Developed by IIT Jodhpur", elem_id="title")
    gr.Markdown(interface_html + links_html)

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image")
            lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto")
            run_button = gr.Button("Run OCR")
            clear_button = gr.Button("Clear", variant="stop")  # Added Clear Button


        with gr.Column():
            output_image = gr.Image(type="pil", label="Processed Image")
            output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox")

    # Examples shown separately (to avoid schema error)
    gr.Examples(
        examples=[["test_images/image_88.jpg", "auto"], 
                  ["test_images/image_742.jpg", "hindi"]],
        inputs=[input_image, lang_dropdown],
        label="Try an example"
    )

    # Connect logic
    run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text])
    clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text])  # Clear logic
    

# Launch
demo.launch(share=True)

# # 👇 Local server launch config
# if __name__ == "__main__":
#     demo.launch(
#         server_name="0.0.0.0",
#         server_port=7866,
#         share=False
#     )