ARCQUB commited on
Commit
5397ce0
·
verified ·
1 Parent(s): 33ad5cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -18
app.py CHANGED
@@ -2,10 +2,20 @@ import gradio as gr
2
  import importlib
3
  from PIL import Image
4
  import json
5
- import os
6
 
7
- # === Load the GPT-4o module only
8
- from models import gpt4o_pix2struct_ocr
 
 
 
 
 
 
 
 
 
 
9
 
10
  # === Format Raw JSON Output
11
  def format_result_json(output):
@@ -128,12 +138,15 @@ def format_pretty_view(output):
128
 
129
  return "\n".join(lines).strip()
130
 
131
- # === Inference Handler (GPT-4o only)
132
- def process_image(image_file):
 
133
  image = Image.open(image_file.name).convert("RGB")
134
- result = gpt4o_pix2struct_ocr.run_model(image)
 
 
135
  parsed_json = result.get("json")
136
- raw_text = result.get("raw")
137
 
138
  if parsed_json:
139
  json_output = format_result_json(parsed_json)
@@ -144,23 +157,27 @@ def process_image(image_file):
144
 
145
  return image, json_output, pretty_output
146
 
147
- # === Gradio Interface
148
  iface = gr.Interface(
149
- fn=process_image,
150
- inputs=[gr.File(file_types=["image"], label="Upload a BPMN Diagram Image")],
 
 
 
151
  outputs=[
152
- gr.Image(label="📷 Input Image"),
153
- gr.Textbox(label="🧠 Raw JSON Output", lines=20),
154
- gr.Textbox(label="📋 Prettified View", lines=25)
155
  ],
156
- title="🧩 BPMN Extractor using GPT-4o + OCR",
157
- description="Upload a BPMN diagram image. Extracts structured JSON using GPT-4o and Pix2Struct OCR. Runs on CPU-only Space.",
158
- allow_flagging="never"
159
  )
160
 
161
- # === Launch without GPU
 
162
  def main():
163
- iface.launch(ssr=False)
164
 
165
  if __name__ == "__main__":
166
  main()
 
2
  import importlib
3
  from PIL import Image
4
  import json
5
+ import spaces
6
 
7
+ # === Model Mapping ===
8
+ MODEL_MAP = {
9
+ #"Qwen": "models.qwen",
10
+ #"Pixtral": "models.pixtral",
11
+ #"Aya Vision": "models.aya_vision",
12
+ "GPT-4o": "models.gpt4o"
13
+ }
14
+
15
+ # === Load Model
16
+ def load_model_runner(model_name):
17
+ module = importlib.import_module(MODEL_MAP[model_name])
18
+ return module.run_model
19
 
20
  # === Format Raw JSON Output
21
  def format_result_json(output):
 
138
 
139
  return "\n".join(lines).strip()
140
 
141
+ # === Main Inference Handler
142
+ def process_single_image(model_name, image_file):
143
+ runner = load_model_runner(model_name)
144
  image = Image.open(image_file.name).convert("RGB")
145
+
146
+ result = runner(image)
147
+
148
  parsed_json = result.get("json")
149
+ raw_text = result.get("raw", "")
150
 
151
  if parsed_json:
152
  json_output = format_result_json(parsed_json)
 
157
 
158
  return image, json_output, pretty_output
159
 
160
+ # === Gradio UI
161
  iface = gr.Interface(
162
+ fn=process_single_image,
163
+ inputs=[
164
+ gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"),
165
+ gr.File(file_types=["image"], label="Upload a BPMN Image")
166
+ ],
167
  outputs=[
168
+ gr.Image(label="Input Image"),
169
+ gr.Textbox(label="Raw JSON Output (Technical)", lines=20),
170
+ gr.Textbox(label="Prettified View (User-Friendly)", lines=25)
171
  ],
172
+ title="🖼️ Vision Model Extractor - JSON + Pretty View",
173
+ description="Upload a BPMN image and select a vision model to extract structured output. GPT-4o uses an API key from your Hugging Face Space Secret.",
174
+ flagging_mode="never"
175
  )
176
 
177
+ # === Enable GPU mode and launch
178
+ #@spaces.GPU
179
  def main():
180
+ iface.launch()
181
 
182
  if __name__ == "__main__":
183
  main()