hardiksharma6555 commited on
Commit
7b88c1b
Β·
verified Β·
1 Parent(s): cede35d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +6 -18
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from gradio_client import Client, handle_file
3
  import re
4
- from thefuzz import fuzz
5
 
6
  # hugging face clients for both OCR options
7
  surya_ocr_client = Client("artificialguybr/Surya-OCR")
@@ -30,7 +29,7 @@ def ocr_extraction(image, ocr_model):
30
  result = client.predict(
31
  image=handle_file(image),
32
  got_mode="plain texts OCR",
33
- fine_grained_mode="box",
34
  ocr_color="red",
35
  ocr_box="Hello!!",
36
  api_name="/run_GOT"
@@ -43,29 +42,19 @@ def ocr_extraction(image, ocr_model):
43
  except Exception as e:
44
  return f"An error occurred: {str(e)}"
45
 
46
- def search_keyword(keyword, search_type):
47
  global extracted_text
48
  if not extracted_text:
49
  return "No OCR text found. Please extract text from an image first."
50
  if not keyword:
51
  return extracted_text
52
 
53
- if search_type == "Direct Search":
54
- highlighted_text = re.sub(f"({re.escape(keyword)})", r'<span style="background-color: yellow;">\1</span>', extracted_text, flags=re.IGNORECASE)
55
- else: # Nearest Search
56
- words = extracted_text.split()
57
- highlighted_words = []
58
- for word in words:
59
- if fuzz.ratio(word.lower(), keyword.lower()) >= 80: # Adjust threshold as needed
60
- highlighted_words.append(f'<span style="background-color: yellow;">{word}</span>')
61
- else:
62
- highlighted_words.append(word)
63
- highlighted_text = " ".join(highlighted_words)
64
 
65
  return highlighted_text
66
 
67
  with gr.Blocks(theme=gr.themes.Soft()) as gr_interface:
68
- gr.Markdown("# πŸ“· OCR Text Extraction and Advanced Keyword Search πŸ”")
69
 
70
  with gr.Row():
71
  with gr.Column(scale=1):
@@ -87,7 +76,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as gr_interface:
87
  with gr.Row():
88
  with gr.Column(scale=1):
89
  keyword_input = gr.Textbox(label="Enter keyword to search")
90
- search_type = gr.Radio(["Direct Search", "Nearest Search"], label="Search Type", value="Direct Search")
91
  search_button = gr.Button("Search Keyword", variant="secondary")
92
 
93
  with gr.Column(scale=2):
@@ -101,8 +89,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as gr_interface:
101
 
102
  search_button.click(
103
  fn=search_keyword,
104
- inputs=[keyword_input, search_type],
105
  outputs=highlighted_output
106
  )
107
 
108
- gr_interface.launch(share=True)
 
1
  import gradio as gr
2
  from gradio_client import Client, handle_file
3
  import re
 
4
 
5
  # hugging face clients for both OCR options
6
  surya_ocr_client = Client("artificialguybr/Surya-OCR")
 
29
  result = client.predict(
30
  image=handle_file(image),
31
  got_mode="plain texts OCR",
32
+ fine_grained_mode="box",
33
  ocr_color="red",
34
  ocr_box="Hello!!",
35
  api_name="/run_GOT"
 
42
  except Exception as e:
43
  return f"An error occurred: {str(e)}"
44
 
45
+ def search_keyword(keyword):
46
  global extracted_text
47
  if not extracted_text:
48
  return "No OCR text found. Please extract text from an image first."
49
  if not keyword:
50
  return extracted_text
51
 
52
+ highlighted_text = re.sub(f"({re.escape(keyword)})", r'<span style="background-color: yellow;">\1</span>', extracted_text, flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
53
 
54
  return highlighted_text
55
 
56
  with gr.Blocks(theme=gr.themes.Soft()) as gr_interface:
57
+ gr.Markdown("# πŸ“· OCR Text Extraction and Direct Keyword Search πŸ”")
58
 
59
  with gr.Row():
60
  with gr.Column(scale=1):
 
76
  with gr.Row():
77
  with gr.Column(scale=1):
78
  keyword_input = gr.Textbox(label="Enter keyword to search")
 
79
  search_button = gr.Button("Search Keyword", variant="secondary")
80
 
81
  with gr.Column(scale=2):
 
89
 
90
  search_button.click(
91
  fn=search_keyword,
92
+ inputs=[keyword_input],
93
  outputs=highlighted_output
94
  )
95
 
96
+ gr_interface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ thefuzz