protae5544 commited on
Commit
04ed58c
·
verified ·
1 Parent(s): e8f7a5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -143
app.py CHANGED
@@ -2,23 +2,16 @@ import base64
2
  from io import BytesIO
3
  import json
4
  import os
5
- import PyPDF2 # เพิ่มไลบรารีสำหรับอ่าน PDF
6
  from openai import OpenAI
7
  from dotenv import load_dotenv
8
  from typhoon_ocr import prepare_ocr_messages
9
  import gradio as gr
10
  from PIL import Image
11
 
12
- # โหลด environment variables
13
  load_dotenv()
14
 
15
- # ตั้งค่า OpenAI API
16
- openai = OpenAI(
17
- base_url=os.getenv("TYPHOON_BASE_URL"),
18
- api_key=os.getenv("TYPHOON_API_KEY")
19
- )
20
 
21
- # ตั้งค่า Theme (เดิม)
22
  theme = gr.themes.Soft(
23
  primary_hue=gr.themes.Color(
24
  c50="#f7f7fd",
@@ -37,131 +30,54 @@ theme = gr.themes.Soft(
37
  neutral_hue="stone",
38
  )
39
 
40
- # ตัวแปรสำหรับบันทึกผลลัพธ์
41
- OUTPUT_FILE = "ocr_results.txt"
42
-
43
- def save_ocr_result(text):
44
- """บันทึกผลลัพธ์ OCR แบบต่อเนื่องในไฟล์เดียว"""
45
- with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
46
- f.write(text + "\n\n")
47
- return OUTPUT_FILE
48
-
49
- def clear_output_file():
50
- """ล้างไฟล์ผลลัพธ์เก่า"""
51
- with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
52
- f.write("")
53
-
54
- def get_pdf_page_count(pdf_path):
55
- """หาจำนวนหน้าของ PDF"""
56
- with open(pdf_path, 'rb') as f:
57
- reader = PyPDF2.PdfReader(f)
58
- return len(reader.pages)
59
- return 0
60
-
61
- def process_pdf(pdf_or_image_file, task_type):
62
  if pdf_or_image_file is None:
63
  return None, "No file uploaded"
64
 
65
  orig_filename = pdf_or_image_file.name
66
- combined_text = ""
67
- image_pil = None # ใช้เก็บภาพหน้าแรกของ PDF
68
 
69
  try:
70
- # ตรวจสอบว่าเป็น PDF หรือไม่
71
- if orig_filename.lower().endswith(".pdf"):
72
- total_pages = get_pdf_page_count(orig_filename)
73
-
74
- if total_pages == 0:
75
- return None, "ไม่สามารถอ่านจำนวนหน้าของ PDF ได้"
76
-
77
- # ประมวลผลทุกหน้า
78
- for page_num in range(1, total_pages + 1):
79
- # เตรียมข้อมูลสำหรับ OCR
80
- messages = prepare_ocr_messages(
81
- pdf_or_image_path=orig_filename,
82
- task_type=task_type,
83
- target_image_dim=1800,
84
- target_text_length=8000,
85
- page_num=page_num
86
- )
87
-
88
- # ดึงภาพหน้าแรก
89
- if page_num == 1:
90
- image_url = messages[0]["content"][1]["image_url"]["url"]
91
- image_base64 = image_url.replace("data:image/png;base64,", "")
92
- image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
93
-
94
- # ส่งไป API
95
- response = openai.chat.completions.create(
96
- model=os.getenv("TYPHOON_OCR_MODEL"),
97
- messages=messages,
98
- max_tokens=16384,
99
- extra_body={
100
- "repetition_penalty": 1.2,
101
- "temperature": 0.1,
102
- "top_p": 0.6,
103
- },
104
- )
105
- text_output = response.choices[0].message.content
106
-
107
- # ดึง natural_text
108
- try:
109
- json_data = json.loads(text_output)
110
- markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
111
- except Exception as e:
112
- markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
113
-
114
- # รวมผลลัพธ์ทุกหน้า
115
- combined_text += f"[Page {page_num}]\n{markdown_out}\n\n"
116
-
117
- # บันทึกผลลัพธ์ทั้งหมดลงไฟล์
118
- save_ocr_result(combined_text)
119
- return image_pil, combined_text, gr.File.update(value=OUTPUT_FILE)
120
 
121
- # หากเป็นไฟล์ภาพ
122
- else:
123
- # ประมวลผลหน้าเดียว
124
- messages = prepare_ocr_messages(
125
- pdf_or_image_path=orig_filename,
126
- task_type=task_type,
127
- target_image_dim=1800,
128
- target_text_length=8000,
129
- page_num=1
130
- )
131
-
132
- # ดึงภาพ
133
- image_url = messages[0]["content"][1]["image_url"]["url"]
134
- image_base64 = image_url.replace("data:image/png;base64,", "")
135
- image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
136
-
137
- # ส่งไป API
138
- response = openai.chat.completions.create(
139
- model=os.getenv("TYPHOON_OCR_MODEL"),
140
- messages=messages,
141
- max_tokens=16384,
142
- extra_body={
143
- "repetition_penalty": 1.2,
144
- "temperature": 0.1,
145
- "top_p": 0.6,
146
- },
147
- )
148
- text_output = response.choices[0].message.content
149
-
150
- # ดึง natural_text
151
- try:
152
- json_data = json.loads(text_output)
153
- markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
154
- except Exception as e:
155
- markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
156
-
157
- # บันทึกผลลัพธ์ลงไฟล์
158
- save_ocr_result(markdown_out)
159
- return image_pil, markdown_out, gr.File.update(value=OUTPUT_FILE)
160
 
161
  except Exception as e:
162
- return None, f"Error processing file: {str(e)}", None
 
163
 
164
- # สร้าง UI
165
  with gr.Blocks(theme=theme) as demo:
166
  title = gr.HTML("""
167
  <h1>Typhoon OCR</h1>
@@ -171,7 +87,7 @@ with gr.Blocks(theme=theme) as demo:
171
  <br />
172
  <details>
173
  <summary><strong>Disclaimer</strong></summary>
174
- The responses generated by this AI system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
175
  </details>
176
  <br />
177
  <details>
@@ -183,14 +99,12 @@ with gr.Blocks(theme=theme) as demo:
183
  <summary><strong>License</strong></summary>
184
  This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
185
  </details>
186
- """)
187
-
188
  with gr.Row():
189
  with gr.Column(scale=1):
190
- # อัปโหลดไฟล์
191
  pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
192
 
193
- # เลือก Task
194
  with gr.Group(elem_classes=["task-background"]):
195
  task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
196
  gr.HTML("""
@@ -201,6 +115,7 @@ with gr.Blocks(theme=theme) as demo:
201
  demo.css = """
202
  .task-background {
203
  background: var(--block-background-fill) !important;
 
204
  }
205
  .task-background > * {
206
  background: var(--block-background-fill) !important;
@@ -210,28 +125,19 @@ with gr.Blocks(theme=theme) as demo:
210
  font-size: 12px;
211
  }
212
  """
213
- # ปุ่มรัน
214
  run_button = gr.Button("🚀 Run")
215
-
216
- # แสดงภาพ
217
  image_output = gr.Image(label="📸 Preview Image", type="pil")
218
-
219
  with gr.Column(scale=2):
220
- # แสดงผลลัพธ์ Markdown
221
  markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
222
-
223
- # ปุ่มดาวน์โหลดไฟล์
224
- download_button = gr.File(label="📥 ดาวน์โหลดผลลัพธ์ทั้งหมด (Text File)", interactive=False)
225
 
226
- # เชื่อมต่อ UI กับฟังก์ชัน
 
227
  run_button.click(
228
  fn=process_pdf,
229
- inputs=[pdf_input, task_dropdown],
230
- outputs=[image_output, markdown_output, download_button]
231
  )
232
 
233
- # เริ่มต้นใหม่ (ล้างไฟล์ผลลัพธ์เก่า)
234
- clear_output_file()
235
-
236
- # รันแอป
237
  demo.launch(share=False)
 
2
  from io import BytesIO
3
  import json
4
  import os
 
5
  from openai import OpenAI
6
  from dotenv import load_dotenv
7
  from typhoon_ocr import prepare_ocr_messages
8
  import gradio as gr
9
  from PIL import Image
10
 
 
11
  load_dotenv()
12
 
13
+ openai = OpenAI(base_url=os.getenv("TYPHOON_BASE_URL"), api_key=os.getenv("TYPHOON_API_KEY"))
 
 
 
 
14
 
 
15
  theme = gr.themes.Soft(
16
  primary_hue=gr.themes.Color(
17
  c50="#f7f7fd",
 
30
  neutral_hue="stone",
31
  )
32
 
33
+ def process_pdf(pdf_or_image_file, task_type, page_number):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if pdf_or_image_file is None:
35
  return None, "No file uploaded"
36
 
37
  orig_filename = pdf_or_image_file.name
 
 
38
 
39
  try:
40
+ # Use the new simplified function to prepare OCR messages with page number
41
+ messages = prepare_ocr_messages(
42
+ pdf_or_image_path=orig_filename,
43
+ task_type=task_type,
44
+ target_image_dim=1800,
45
+ target_text_length=8000,
46
+ page_num=page_number if page_number else 1
47
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Extract the image from the message content for display
50
+ image_url = messages[0]["content"][1]["image_url"]["url"]
51
+ image_base64 = image_url.replace("data:image/png;base64,", "")
52
+ image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
53
+
54
+ # Send messages to OpenAI compatible API
55
+ response = openai.chat.completions.create(
56
+ model=os.getenv("TYPHOON_OCR_MODEL"),
57
+ messages=messages,
58
+ max_tokens=16384,
59
+ extra_body={
60
+ "repetition_penalty": 1.2,
61
+ "temperature": 0.1,
62
+ "top_p": 0.6,
63
+ },
64
+ )
65
+ text_output = response.choices[0].message.content
66
+
67
+ # Try to parse the output assuming it is a Python dictionary containing 'natural_text'
68
+ try:
69
+ json_data = json.loads(text_output)
70
+ markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
71
+ except Exception as e:
72
+ markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
73
+
74
+ return image_pil, markdown_out
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  except Exception as e:
77
+ return None, f"Error processing file: {str(e)}"
78
+
79
 
80
+ # Build the Gradio UI.
81
  with gr.Blocks(theme=theme) as demo:
82
  title = gr.HTML("""
83
  <h1>Typhoon OCR</h1>
 
87
  <br />
88
  <details>
89
  <summary><strong>Disclaimer</strong></summary>
90
+ The responses generated by this Artificial Intelligence (AI) system are autonomously constructed and do not necessarily reflect the views or positions of the developing organizations, their affiliates, or any of their employees. These AI-generated responses do not represent those of the organizations. The organizations do not endorse, support, sanction, encourage, verify, or agree with the comments, opinions, or statements generated by this AI. The information produced by this AI is not intended to malign any religion, ethnic group, club, organization, company, individual, anyone, or anything. It is not the intent of the organizations to malign any group or individual. The AI operates based on its programming and training data and its responses should not be interpreted as the explicit intent or opinion of the organizations.
91
  </details>
92
  <br />
93
  <details>
 
99
  <summary><strong>License</strong></summary>
100
  This project utilizes certain datasets and checkpoints that are subject to their respective original licenses. Users must comply with all terms and conditions of these original licenses. The content of this project itself is licensed under the Apache license 2.0.
101
  </details>
102
+ """)
 
103
  with gr.Row():
104
  with gr.Column(scale=1):
105
+ # Update file_types to accept PDF as well as common image formats.
106
  pdf_input = gr.File(label="📄 Upload Image file or PDF file", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
107
 
 
108
  with gr.Group(elem_classes=["task-background"]):
109
  task_dropdown = gr.Radio(["default", "structure"], label="🎯 Select Task", value="default")
110
  gr.HTML("""
 
115
  demo.css = """
116
  .task-background {
117
  background: var(--block-background-fill) !important;
118
+
119
  }
120
  .task-background > * {
121
  background: var(--block-background-fill) !important;
 
125
  font-size: 12px;
126
  }
127
  """
128
+ page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
129
  run_button = gr.Button("🚀 Run")
 
 
130
  image_output = gr.Image(label="📸 Preview Image", type="pil")
 
131
  with gr.Column(scale=2):
 
132
  markdown_output = gr.Markdown(label='Markdown Result', show_label=True)
 
 
 
133
 
134
+
135
+ # Connect the UI inputs to the processing function.
136
  run_button.click(
137
  fn=process_pdf,
138
+ inputs=[pdf_input, task_dropdown, page_number],
139
+ outputs=[image_output, markdown_output]
140
  )
141
 
142
+ # Launch the Gradio demo (temporary public share for 72 hours)
 
 
 
143
  demo.launch(share=False)