Zoe911 commited on
Commit
c2c00e6
·
verified ·
1 Parent(s): 4875ce9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -4,33 +4,33 @@ from PIL import Image
4
  import numpy as np
5
  import re
6
 
7
- # 初始化OCR模型
8
  ocr = PaddleOCR(use_angle_cls=True, lang="en")
9
 
10
  def ocr_recognition(image):
11
  try:
12
- # 确保使用PIL
13
  if isinstance(image, np.ndarray):
14
  image = Image.fromarray(image)
15
  if not isinstance(image, Image.Image):
16
  return "Invalid image format"
17
 
18
- # 转换为numpy数组
19
  image_np = np.array(image)
20
 
21
- # 执行OCR识别
22
  result = ocr.ocr(image_np, cls=True)
23
 
24
  if not result or len(result) == 0 or result[0] is None:
25
  return "No text recognized"
26
 
27
- # 提取文本
28
  texts = [line[1][0] for line in result[0] if line]
29
 
30
  if not texts:
31
  return "No text recognized"
32
 
33
- # 解析文本以提取结构化信息
34
  structured_info = parse_text_to_structure(texts)
35
  return structured_info
36
 
@@ -38,7 +38,7 @@ def ocr_recognition(image):
38
  return f"Error processing image: {str(e)}"
39
 
40
  def parse_text_to_structure(texts):
41
- # 初始化结构化字段
42
  structured_data = {
43
  "Event": "",
44
  "Location": "",
@@ -48,11 +48,11 @@ def parse_text_to_structure(texts):
48
  "Date": ""
49
  }
50
 
51
- # 合并所有文本
52
  full_text = " ".join(texts)
53
 
54
- # 尝试使用正则表达式匹配各个字段
55
- # 这些模式需要根据您的实际文档格式进行调整
56
  event_pattern = re.search(r"(?:Event|Title):\s*(.+?)(?=\s*(?:Location|Venue|Date|Start|End|Duration)|$)", full_text, re.I)
57
  if event_pattern:
58
  structured_data["Event"] = event_pattern.group(1).strip()
@@ -77,29 +77,29 @@ def parse_text_to_structure(texts):
77
  if duration_pattern:
78
  structured_data["Duration"] = duration_pattern.group(1).strip()
79
 
80
- # 如果没有找到明确的持续时间,但有开始和结束时间,可以尝试计算
81
  if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]:
82
- # 这里需要更复杂的时间计算逻辑,简单起见暂时略过
83
  pass
84
 
85
- # 格式化输出
86
  output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value])
87
 
88
- # 如果没有提取到任何结构化信息,返回原始文本
89
  if not output:
90
- return "未能提取结构化信息。原始文本:\n" + "\n".join(texts)
91
 
92
  return output
93
 
94
- # 创建Gradio界面
95
  interface = gr.Interface(
96
  fn=ocr_recognition,
97
- inputs=gr.Image(type="pil"), # 明确指定使用PIL
98
  outputs="text",
99
- title="结构化OCR识别",
100
- description="上传图片进行文本识别,并提取事件、地点、时间等结构化信息"
101
  )
102
 
103
- # 启动服务
104
  if __name__ == "__main__":
105
- interface.launch()
 
4
  import numpy as np
5
  import re
6
 
7
+ # Initialize OCR model
8
  ocr = PaddleOCR(use_angle_cls=True, lang="en")
9
 
10
  def ocr_recognition(image):
11
  try:
12
+ # Ensure using PIL
13
  if isinstance(image, np.ndarray):
14
  image = Image.fromarray(image)
15
  if not isinstance(image, Image.Image):
16
  return "Invalid image format"
17
 
18
+ # Convert to numpy array
19
  image_np = np.array(image)
20
 
21
+ # Perform OCR recognition
22
  result = ocr.ocr(image_np, cls=True)
23
 
24
  if not result or len(result) == 0 or result[0] is None:
25
  return "No text recognized"
26
 
27
+ # Extract text
28
  texts = [line[1][0] for line in result[0] if line]
29
 
30
  if not texts:
31
  return "No text recognized"
32
 
33
+ # Parse text to extract structured information
34
  structured_info = parse_text_to_structure(texts)
35
  return structured_info
36
 
 
38
  return f"Error processing image: {str(e)}"
39
 
40
  def parse_text_to_structure(texts):
41
+ # Initialize structured fields
42
  structured_data = {
43
  "Event": "",
44
  "Location": "",
 
48
  "Date": ""
49
  }
50
 
51
+ # Merge all text
52
  full_text = " ".join(texts)
53
 
54
+ # Try to match each field using regex
55
+ # These patterns should be adjusted according to your actual document format
56
  event_pattern = re.search(r"(?:Event|Title):\s*(.+?)(?=\s*(?:Location|Venue|Date|Start|End|Duration)|$)", full_text, re.I)
57
  if event_pattern:
58
  structured_data["Event"] = event_pattern.group(1).strip()
 
77
  if duration_pattern:
78
  structured_data["Duration"] = duration_pattern.group(1).strip()
79
 
80
+ # If no explicit duration is found but we have start and end times, we could calculate it
81
  if structured_data["Duration"] == "" and structured_data["Start Time"] and structured_data["End Time"]:
82
+ # More complex time calculation logic would be needed here
83
  pass
84
 
85
+ # Format output
86
  output = "\n".join([f"{key}: {value}" for key, value in structured_data.items() if value])
87
 
88
+ # If no structured information was extracted, return the original text
89
  if not output:
90
+ return "\n".join(texts)
91
 
92
  return output
93
 
94
+ # Create Gradio interface
95
  interface = gr.Interface(
96
  fn=ocr_recognition,
97
+ inputs=gr.Image(type="pil"), # Explicitly specify using PIL
98
  outputs="text",
99
+ title="Structured OCR Recognition",
100
+ description="Upload an image for text recognition and extraction of structured information"
101
  )
102
 
103
+ # Launch the service
104
  if __name__ == "__main__":
105
+ interface.launch()