Spaces:

Tonic
/

GOT-OCR

Running

App Files Files Community

Tonic commited on Sep 14, 2024

Commit

ee4b3d0

unverified ·

1 Parent(s): 02ff46f

add html and markdown outputs , refactor the interface, add outputs

Browse files

Files changed (2) hide show

app.py +65 -100
globe.py +55 -0

app.py CHANGED Viewed

@@ -8,40 +8,9 @@ import io
 from PIL import Image
 import numpy as np
 import yaml
-import markdown
 from pathlib import Path
-# Function to extract title and description from the markdown file
-def extract_title_description(md_file_path):
-    with open(md_file_path, 'r') as f:
-        lines = f.readlines()
-    # Extract frontmatter (YAML) for title
-    frontmatter = []
-    content_start = 0
-    if lines[0].strip() == '---':
-        for idx, line in enumerate(lines[1:], 1):
-            if line.strip() == '---':
-                content_start = idx + 1
-                break
-            frontmatter.append(line)
-    frontmatter_yaml = yaml.safe_load(''.join(frontmatter))
-    title = frontmatter_yaml.get('title', 'Title Not Found')
-    # Extract content (description)
-    description_md = ''.join(lines[content_start:])
-    description = markdown.markdown(description_md)
-    return title, description
-# Path to the markdown file
-md_file_path = 'content/index.md'
-# Extract title and description from the markdown file
-title, description = extract_title_description(md_file_path)
-# Rest of the script continues as before
 model_name = 'ucaslcl/GOT-OCR2_0'
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
@@ -55,114 +24,110 @@ def image_to_base64(image):
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
 @spaces.GPU
-def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
     if task == "Plain Text OCR":
         res = model.chat(tokenizer, image, ocr_type='ocr')
     elif task == "Format Text OCR":
-        res = model.chat(tokenizer, image, ocr_type='format')
     elif task == "Fine-grained OCR (Box)":
-        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box)
     elif task == "Fine-grained OCR (Color)":
-        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color)
     elif task == "Multi-crop OCR":
-        res = model.chat_crop(tokenizer, image_file=image)
     elif task == "Render Formatted OCR":
-        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html')
-        with open('./demo.html', 'r') as f:
-            html_content = f.read()
-        return res, html_content
-    return res, None
 def update_inputs(task):
-    if task == "Plain Text OCR" or task == "Format Text OCR" or task == "Multi-crop OCR":
-        return [gr.update(visible=False)] * 4
     elif task == "Fine-grained OCR (Box)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=True),
             gr.update(visible=False),
-            gr.update(visible=False)
         ]
     elif task == "Fine-grained OCR (Color)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=False),
             gr.update(visible=True, choices=["red", "green", "blue"]),
-            gr.update(visible=False)
         ]
-    elif task == "Render Formatted OCR":
-        return [gr.update(visible=False)] * 3 + [gr.update(visible=True)]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
     if html_content:
-        return res, html_content
     return res, None
 import gradio as gr
 with gr.Blocks() as demo:
-    with gr.Row():
-        # Left Column: Description
-        with gr.Column(scale=1):
-            gr.Markdown(f"# {title}")
-            gr.Markdown(description)
-        # Right Column: App Inputs and Outputs
-        with gr.Column(scale=3):
-            image_input = gr.Image(type="filepath", label="Input Image")
-            task_dropdown = gr.Dropdown(
-                choices=[
-                    "Plain Text OCR",
-                    "Format Text OCR",
-                    "Fine-grained OCR (Box)",
-                    "Fine-grained OCR (Color)",
-                    "Multi-crop OCR",
-                    "Render Formatted OCR"
-                ],
-                label="Select Task",
-                value="Plain Text OCR"
-            )
-            ocr_type_dropdown = gr.Dropdown(
-                choices=["ocr", "format"],
-                label="OCR Type",
-                visible=False
-            )
-            ocr_box_input = gr.Textbox(
-                label="OCR Box (x1,y1,x2,y2)",
-                placeholder="e.g., 100,100,200,200",
-                visible=False
-            )
-            ocr_color_dropdown = gr.Dropdown(
-                choices=["red", "green", "blue"],
-                label="OCR Color",
-                visible=False
-            )
-            render_checkbox = gr.Checkbox(
-                label="Render Result",
-                visible=False
-            )
-            submit_button = gr.Button("Process")
-            # OCR Result below the Submit button
-            output_text = gr.Textbox(label="OCR Result")
-            output_html = gr.HTML(label="Rendered HTML Output")
-    # Update inputs dynamically based on task selection
     task_dropdown.change(
         update_inputs,
         inputs=[task_dropdown],
-        outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, render_checkbox]
     )
-    # Process OCR on button click
     submit_button.click(
         ocr_demo,
         inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
-        outputs=[output_text, output_html]
     )
 if __name__ == "__main__":
-    demo.launch()

 from PIL import Image
 import numpy as np
 import yaml
 from pathlib import Path
+from globe import title, description, modelinfor, joinus
 model_name = 'ucaslcl/GOT-OCR2_0'
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
+html_file = './demo.html'
 @spaces.GPU
+def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     if task == "Plain Text OCR":
         res = model.chat(tokenizer, image, ocr_type='ocr')
+        return res, None
     elif task == "Format Text OCR":
+        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
     elif task == "Fine-grained OCR (Box)":
+        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=html_file)
     elif task == "Fine-grained OCR (Color)":
+        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=html_file)
     elif task == "Multi-crop OCR":
+        res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
     elif task == "Render Formatted OCR":
+        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
+    with open(html_file, 'r') as f:
+        html_content = f.read()
+    return res, html_content
 def update_inputs(task):
+    if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
+        return [gr.update(visible=False)] * 3
     elif task == "Fine-grained OCR (Box)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=True),
             gr.update(visible=False),
         ]
     elif task == "Fine-grained OCR (Color)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=False),
             gr.update(visible=True, choices=["red", "green", "blue"]),
         ]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
+    res = f"${res}$"
+    res = res.replace("$\\begin{tabular}", "\\begin{tabular}")
+    res = res.replace("\\end{tabular}$", "\\end{tabular}")
+    res = res.replace("\\(", "")
+    res = res.replace("\\)", "")
     if html_content:
+        html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
+        return res, html_string
     return res, None
 import gradio as gr
 with gr.Blocks() as demo:
+    gr.Markdown(title)
+    gr.Markdown(description)
+    gr.Markdown(joinus)
+    with gr.Column():
+        image_input = gr.Image(type="filepath", label="Input Image")
+        task_dropdown = gr.Dropdown(
+            choices=[
+                "Plain Text OCR",
+                "Format Text OCR",
+                "Fine-grained OCR (Box)",
+                "Fine-grained OCR (Color)",
+                "Multi-crop OCR",
+                "Render Formatted OCR"
+            ],
+            label="Select Task",
+            value="Plain Text OCR"
+        )
+        ocr_type_dropdown = gr.Dropdown(
+            choices=["ocr", "format"],
+            label="OCR Type",
+            visible=False
+        )
+        ocr_box_input = gr.Textbox(
+            label="OCR Box (x1,y1,x2,y2)",
+            placeholder="e.g., 100,100,200,200",
+            visible=False
+        )
+        ocr_color_dropdown = gr.Dropdown(
+            choices=["red", "green", "blue"],
+            label="OCR Color",
+            visible=False
+        )
+        submit_button = gr.Button("Process")
+        output_markdown = gr.Markdown(label="🫴🏻📸GOT-OCR")
+        output_html = gr.HTML(label="🫴🏻📸GOT-OCR")
+    gr.Markdown(modelinfor)
     task_dropdown.change(
         update_inputs,
         inputs=[task_dropdown],
+        outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
     )
     submit_button.click(
         ocr_demo,
         inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
+        outputs=[output_markdown, output_html]
     )
 if __name__ == "__main__":
+    demo.launch()

globe.py ADDED Viewed

	@@ -0,0 +1,55 @@

+title = """# 🙋🏻‍♂️Welcome to Tonic's🫴🏻📸GOT-OCR
+---
+"""
+description = """
+The **GOT-OCR model** is a cutting-edge OCR system with **580M parameters**, designed to process a wide range of "characters." Equipped with a **high-compression encoder** and a **long-context decoder**, it excels in both scene and document-style images. The model supports **multi-page** and **dynamic resolution OCR**, enhancing its versatility.
+### Key Features
+- **Plain Text OCR**: Extracts text from images.
+- **Formatted Text OCR**: Retains the original formatting, including tables and formulas.
+- **Fine-grained OCR**: Offers box-based and color-based OCR for precision in specific regions.
+- **Multi-crop OCR**: Handles multiple cropped sections within an image.
+## Supported Content Types
+- Plain text
+- Math/molecular formulas
+- Tables and charts
+- Sheet music
+- Geometric shapes
+## How to Use
+1. Select a task from the dropdown menu.
+2. Upload an image.
+3. (Optional) Adjust parameters based on the selected task.
+4. Click **Process** to view the results.
+"""
+joinus = """---
+### Join us :
+🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
+"""
+modelinfor = """---
+### Model Information
+- **Model Name**: GOT-OCR 2.0
+- **Hugging Face Repository**: [ucaslcl/GOT-OCR2_0](https://huggingface.co/ucaslcl/GOT-OCR2_0)
+- **Environment**: CUDA 11.8 + PyTorch 2.0.1
+"""
+tasks = [
+    "Plain Text OCR",
+    "Format Text OCR",
+    "Fine-grained OCR (Box)",
+    "Fine-grained OCR (Color)",
+    "Multi-crop OCR",
+    "Render Formatted OCR"
+]
+ocr_types = ["ocr", "format"]
+ocr_colors = ["red", "green", "blue"]