Update app.py
Browse files
app.py
CHANGED
@@ -313,14 +313,66 @@ def fetch_inciweb_data():
|
|
313 |
|
314 |
# Enhanced coordinate extraction with multiple methods
|
315 |
def get_incident_coordinates_basic(incident_url):
|
316 |
-
"""Enhanced coordinate extraction with
|
317 |
try:
|
318 |
print(f" Fetching coordinates from: {incident_url}")
|
319 |
response = requests.get(incident_url, timeout=20)
|
320 |
response.raise_for_status()
|
321 |
soup = BeautifulSoup(response.content, "html.parser")
|
322 |
|
323 |
-
# Method 1: Look for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
meta_tags = soup.find_all("meta")
|
325 |
for meta in meta_tags:
|
326 |
if meta.get("name") == "geo.position":
|
@@ -333,28 +385,6 @@ def get_incident_coordinates_basic(incident_url):
|
|
333 |
except ValueError:
|
334 |
pass
|
335 |
|
336 |
-
# Method 2: Look for coordinate table rows
|
337 |
-
for row in soup.find_all('tr'):
|
338 |
-
th = row.find('th')
|
339 |
-
if th and 'Coordinates' in th.get_text(strip=True):
|
340 |
-
coord_cell = row.find('td')
|
341 |
-
if coord_cell:
|
342 |
-
coord_text = coord_cell.get_text(strip=True)
|
343 |
-
|
344 |
-
# Try to extract decimal coordinates
|
345 |
-
lat_match = re.search(r'(-?\d+\.?\d+)', coord_text)
|
346 |
-
if lat_match:
|
347 |
-
# Look for longitude after latitude
|
348 |
-
lon_match = re.search(r'(-?\d+\.?\d+)', coord_text[lat_match.end():])
|
349 |
-
if lon_match:
|
350 |
-
try:
|
351 |
-
lat = float(lat_match.group(1))
|
352 |
-
lon = float(lon_match.group(1))
|
353 |
-
print(f" Found coordinates via table: {lat}, {lon}")
|
354 |
-
return lat, lon
|
355 |
-
except ValueError:
|
356 |
-
pass
|
357 |
-
|
358 |
# Method 3: Look for script tags with map data
|
359 |
script_tags = soup.find_all("script")
|
360 |
for script in script_tags:
|
|
|
313 |
|
314 |
# Enhanced coordinate extraction with multiple methods
|
315 |
def get_incident_coordinates_basic(incident_url):
|
316 |
+
"""Enhanced coordinate extraction with proper DMS parsing"""
|
317 |
try:
|
318 |
print(f" Fetching coordinates from: {incident_url}")
|
319 |
response = requests.get(incident_url, timeout=20)
|
320 |
response.raise_for_status()
|
321 |
soup = BeautifulSoup(response.content, "html.parser")
|
322 |
|
323 |
+
# Method 1: Look for coordinate table rows with proper DMS parsing
|
324 |
+
for row in soup.find_all('tr'):
|
325 |
+
th = row.find('th')
|
326 |
+
if th and 'Coordinates' in th.get_text(strip=True):
|
327 |
+
coord_cell = row.find('td')
|
328 |
+
if coord_cell:
|
329 |
+
coord_content = coord_cell.get_text(strip=True)
|
330 |
+
print(f" Found coordinate cell content: {coord_content}")
|
331 |
+
|
332 |
+
# Parse latitude values (look for degrees, minutes, seconds)
|
333 |
+
lat_deg_match = re.search(r'(\d+)\s*°.*?Latitude', coord_content)
|
334 |
+
lat_min_match = re.search(r'(\d+)\s*\'.*?Latitude', coord_content)
|
335 |
+
lat_sec_match = re.search(r'(\d+\.?\d*)\s*\'\'.*?Latitude', coord_content)
|
336 |
+
|
337 |
+
# Parse longitude values (look for them after Latitude keyword)
|
338 |
+
longitude_part = coord_content[coord_content.find('Latitude'):] if 'Latitude' in coord_content else coord_content
|
339 |
+
lon_deg_match = re.search(r'[-]?\s*(\d+)\s*°', longitude_part)
|
340 |
+
lon_min_match = re.search(r'(\d+)\s*\'', longitude_part)
|
341 |
+
|
342 |
+
# Look for longitude seconds in div elements or text
|
343 |
+
lon_sec_div = coord_cell.find('div', class_=lambda c: c and 'margin-right' in c)
|
344 |
+
if lon_sec_div:
|
345 |
+
lon_sec_value = lon_sec_div.get_text(strip=True)
|
346 |
+
lon_sec_match = re.search(r'(\d+\.?\d*)', lon_sec_value)
|
347 |
+
print(f" Found longitude seconds in div: {lon_sec_value}")
|
348 |
+
else:
|
349 |
+
lon_sec_match = re.search(r'(\d+\.?\d*)\s*\'\'', longitude_part)
|
350 |
+
|
351 |
+
print(f" Parsed components - lat_deg: {lat_deg_match.group(1) if lat_deg_match else None}, "
|
352 |
+
f"lat_min: {lat_min_match.group(1) if lat_min_match else None}, "
|
353 |
+
f"lat_sec: {lat_sec_match.group(1) if lat_sec_match else None}")
|
354 |
+
print(f" lon_deg: {lon_deg_match.group(1) if lon_deg_match else None}, "
|
355 |
+
f"lon_min: {lon_min_match.group(1) if lon_min_match else None}, "
|
356 |
+
f"lon_sec: {lon_sec_match.group(1) if lon_sec_match else None}")
|
357 |
+
|
358 |
+
# If all values are found, convert to decimal coordinates
|
359 |
+
if lat_deg_match and lat_min_match and lat_sec_match and lon_deg_match and lon_min_match and lon_sec_match:
|
360 |
+
lat_deg = float(lat_deg_match.group(1))
|
361 |
+
lat_min = float(lat_min_match.group(1))
|
362 |
+
lat_sec = float(lat_sec_match.group(1))
|
363 |
+
|
364 |
+
lon_deg = float(lon_deg_match.group(1))
|
365 |
+
lon_min = float(lon_min_match.group(1))
|
366 |
+
lon_sec = float(lon_sec_match.group(1))
|
367 |
+
|
368 |
+
# Convert DMS to decimal degrees
|
369 |
+
latitude = lat_deg + lat_min/60 + lat_sec/3600
|
370 |
+
longitude = -(lon_deg + lon_min/60 + lon_sec/3600) # Western hemisphere is negative
|
371 |
+
|
372 |
+
print(f" Converted DMS to decimal: {latitude}, {longitude}")
|
373 |
+
return latitude, longitude
|
374 |
+
|
375 |
+
# Method 2: Look for meta tags with coordinates
|
376 |
meta_tags = soup.find_all("meta")
|
377 |
for meta in meta_tags:
|
378 |
if meta.get("name") == "geo.position":
|
|
|
385 |
except ValueError:
|
386 |
pass
|
387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
# Method 3: Look for script tags with map data
|
389 |
script_tags = soup.find_all("script")
|
390 |
for script in script_tags:
|