Zoe911 commited on
Commit
36e8407
·
verified ·
1 Parent(s): 3b8de2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -36
app.py CHANGED
@@ -23,13 +23,12 @@ nlp = load_spacy_model()
23
  # Helper function: Normalize date and time expressions
24
  def normalize_data_time(text):
25
  try:
26
- # Use dateutil to parse the time
27
  parsed_date = dateutil.parser.parse(text, fuzzy=True)
28
- return parsed_date # Return datetime object instead of string
29
  except (ValueError, TypeError):
30
- return None # Return None if parsing fails
31
 
32
- # Calculate duration between two times (in hours and minutes)
33
  def calculate_duration(start_time, end_time):
34
  if isinstance(start_time, datetime) and isinstance(end_time, datetime):
35
  delta = end_time - start_time
@@ -39,68 +38,58 @@ def calculate_duration(start_time, end_time):
39
  return f"{hours}h {minutes}m"
40
  return "Unknown"
41
 
42
- # Main function
43
  def process_text(text):
44
  doc = nlp(text)
45
 
46
- # Extract named entities
47
  entities = {
48
- "PERSON": [], # Names
49
- "DATE": [], # Dates
50
- "TIME": [], # Times
51
- "GPE": [], # Locations
52
- "EVENT": [] # Events
53
  }
54
 
55
- # Extract information from spaCy's named entity recognition
56
  for ent in doc.ents:
57
  if ent.label_ in entities:
58
  entities[ent.label_].append(ent.text)
59
- # Normalize dates
60
  if ent.label_ == "DATE":
61
  normalized = normalize_data_time(ent.text)
62
  entities[ent.label_][-1] = normalized.strftime("%Y-%m-%d") if normalized else ent.text
63
 
64
- # Infer the event (look for verbs or noun phrases)
65
  event = None
66
  for token in doc:
67
  if token.pos_ in ["VERB", "NOUN"] and token.dep_ in ["ROOT", "dobj", "nsubj"]:
68
- event = token.lemma_ # Use the lemmatized form
69
  break
70
  if event:
71
  entities["EVENT"].append(event)
72
 
73
- # Extract start time, end time, and calculate duration
74
  start_time = None
75
  end_time = None
76
  duration = "Unknown"
77
 
78
- # Look for patterns like "from 3PM to 4PM" or "3PM to 5PM"
79
- time_pattern = re.compile(r"(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\s*(?:to|until|-)\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)", re.IGNORECASE)
80
  match = time_pattern.search(text)
81
 
82
  if match:
83
- # Found a range like "3PM to 4PM"
84
  start_str, end_str = match.groups()
85
  start_time = normalize_data_time(start_str)
86
  end_time = normalize_data_time(end_str)
87
  else:
88
- # If no range, use the first and second TIME entities
89
- if len(entities["TIME"]) >= 1:
90
- start_time = normalize_data_time(entities["TIME"][0])
91
- if len(entities["TIME"]) >= 2:
92
- end_time = normalize_data_time(entities["TIME"][1])
93
-
94
- # Calculate duration if both start and end times are available
 
 
95
  if start_time and end_time:
96
  duration = calculate_duration(start_time, end_time)
97
- start_time_str = start_time.strftime("%H:%M")
98
- end_time_str = end_time.strftime("%H:%M")
99
- else:
100
- start_time_str = start_time.strftime("%H:%M") if start_time else "None"
101
- end_time_str = "None"
102
 
103
- # Construct structured output
104
  result = {
105
  "Event": entities["EVENT"][0] if entities["EVENT"] else "Unknown",
106
  "People": ", ".join(entities["PERSON"]) if entities["PERSON"] else "None",
@@ -111,7 +100,6 @@ def process_text(text):
111
  "Location": ", ".join(entities["GPE"]) if entities["GPE"] else "None"
112
  }
113
 
114
- # Format the output
115
  output = (
116
  f"Event: {result['Event']}\n"
117
  f"People: {result['People']}\n"
@@ -123,7 +111,7 @@ def process_text(text):
123
  )
124
  return output
125
 
126
- # Gradio interface (moved outside of process_text)
127
  demo = gr.Interface(
128
  fn=process_text,
129
  inputs="text",
@@ -132,8 +120,6 @@ demo = gr.Interface(
132
  description="Input text or OCR-extracted text to normalize and extract event with start time, end time, and duration"
133
  )
134
 
135
- # Launch the app
136
  if __name__ == "__main__":
137
  print("Launching Gradio application...")
138
  demo.launch(server_name="0.0.0.0", server_port=7860)
139
-
 
23
  # Helper function: Normalize date and time expressions
24
  def normalize_data_time(text):
25
  try:
 
26
  parsed_date = dateutil.parser.parse(text, fuzzy=True)
27
+ return parsed_date
28
  except (ValueError, TypeError):
29
+ return None
30
 
31
+ # Calculate duration between two times
32
  def calculate_duration(start_time, end_time):
33
  if isinstance(start_time, datetime) and isinstance(end_time, datetime):
34
  delta = end_time - start_time
 
38
  return f"{hours}h {minutes}m"
39
  return "Unknown"
40
 
41
+ # Main processing function
42
  def process_text(text):
43
  doc = nlp(text)
44
 
 
45
  entities = {
46
+ "PERSON": [],
47
+ "DATE": [],
48
+ "TIME": [],
49
+ "GPE": [],
50
+ "EVENT": []
51
  }
52
 
 
53
  for ent in doc.ents:
54
  if ent.label_ in entities:
55
  entities[ent.label_].append(ent.text)
 
56
  if ent.label_ == "DATE":
57
  normalized = normalize_data_time(ent.text)
58
  entities[ent.label_][-1] = normalized.strftime("%Y-%m-%d") if normalized else ent.text
59
 
 
60
  event = None
61
  for token in doc:
62
  if token.pos_ in ["VERB", "NOUN"] and token.dep_ in ["ROOT", "dobj", "nsubj"]:
63
+ event = token.lemma_
64
  break
65
  if event:
66
  entities["EVENT"].append(event)
67
 
 
68
  start_time = None
69
  end_time = None
70
  duration = "Unknown"
71
 
72
+ # Try to match range
73
+ time_pattern = re.compile(r"(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\s*(?:to|until|-|–)\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)", re.IGNORECASE)
74
  match = time_pattern.search(text)
75
 
76
  if match:
 
77
  start_str, end_str = match.groups()
78
  start_time = normalize_data_time(start_str)
79
  end_time = normalize_data_time(end_str)
80
  else:
81
+ # Fallback: extract multiple AM/PM time entries
82
+ ampm_matches = re.findall(r"\d{1,2}(?::\d{2})?\s*[AaPp][Mm]", text)
83
+ if len(ampm_matches) >= 1:
84
+ start_time = normalize_data_time(ampm_matches[0])
85
+ if len(ampm_matches) >= 2:
86
+ end_time = normalize_data_time(ampm_matches[1])
87
+
88
+ start_time_str = start_time.strftime("%H:%M") if start_time else "None"
89
+ end_time_str = end_time.strftime("%H:%M") if end_time else "None"
90
  if start_time and end_time:
91
  duration = calculate_duration(start_time, end_time)
 
 
 
 
 
92
 
 
93
  result = {
94
  "Event": entities["EVENT"][0] if entities["EVENT"] else "Unknown",
95
  "People": ", ".join(entities["PERSON"]) if entities["PERSON"] else "None",
 
100
  "Location": ", ".join(entities["GPE"]) if entities["GPE"] else "None"
101
  }
102
 
 
103
  output = (
104
  f"Event: {result['Event']}\n"
105
  f"People: {result['People']}\n"
 
111
  )
112
  return output
113
 
114
+ # Gradio interface
115
  demo = gr.Interface(
116
  fn=process_text,
117
  inputs="text",
 
120
  description="Input text or OCR-extracted text to normalize and extract event with start time, end time, and duration"
121
  )
122
 
 
123
  if __name__ == "__main__":
124
  print("Launching Gradio application...")
125
  demo.launch(server_name="0.0.0.0", server_port=7860)