Spaces:
Sleeping
Sleeping
import gradio as gr | |
from gradio_client import Client, handle_file | |
import re | |
# hugging face clients for both OCR options | |
surya_ocr_client = Client("artificialguybr/Surya-OCR") | |
got_ocr_client = Client("stepfun-ai/GOT_official_online_demo") | |
# Global variable to store the extracted OCR text | |
extracted_text = "" | |
def ocr_extraction(image, ocr_model): | |
global extracted_text | |
if image is None: | |
return "Please upload an image first." | |
try: | |
if ocr_model == "Surya OCR": | |
client = surya_ocr_client | |
result = client.predict( | |
image=handle_file(image), | |
langs="en", | |
api_name="/ocr_workflow" | |
) | |
text_matches = re.findall(r"text='(.*?)'", str(result)) | |
extracted_text = "\n".join(text_matches) | |
elif ocr_model == "GOT OCR": | |
client = got_ocr_client | |
result = client.predict( | |
image=handle_file(image), | |
got_mode="plain texts OCR", | |
fine_grained_mode="box", | |
ocr_color="red", | |
ocr_box="Hello!!", | |
api_name="/run_GOT" | |
) | |
extracted_text = result[0] | |
else: | |
return "Invalid OCR model selected." | |
return extracted_text | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
def search_keyword(keyword): | |
global extracted_text | |
if not extracted_text: | |
return "No OCR text found. Please extract text from an image first." | |
if not keyword: | |
return extracted_text | |
highlighted_text = re.sub(f"({re.escape(keyword)})", r'<span style="background-color: yellow;">\1</span>', extracted_text, flags=re.IGNORECASE) | |
return highlighted_text | |
with gr.Blocks(theme=gr.themes.Soft()) as gr_interface: | |
gr.Markdown("# π· OCR Text Extraction and Direct Keyword Search π") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
image_input = gr.Image(type="filepath", label="Upload Image") | |
ocr_model_dropdown = gr.Dropdown( | |
choices=["Surya OCR", "GOT OCR"], | |
value="Surya OCR", | |
label="Select OCR Model" | |
) | |
ocr_button = gr.Button("Extract Text", variant="primary") | |
with gr.Column(scale=2): | |
extracted_text_output = gr.Textbox( | |
label="Extracted Text", | |
placeholder="Text extracted from the image will appear here.", | |
lines=10 | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
keyword_input = gr.Textbox(label="Enter keyword to search") | |
search_button = gr.Button("Search Keyword", variant="secondary") | |
with gr.Column(scale=2): | |
highlighted_output = gr.HTML(label="Highlighted Text") | |
ocr_button.click( | |
fn=ocr_extraction, | |
inputs=[image_input, ocr_model_dropdown], | |
outputs=extracted_text_output | |
) | |
search_button.click( | |
fn=search_keyword, | |
inputs=[keyword_input], | |
outputs=highlighted_output | |
) | |
gr_interface.launch(share=True) | |