# Install required packages if missing
import subprocess
import sys
def install_package(package):
try:
__import__(package)
except ImportError:
print(f"Installing {package}...")
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Install required packages
required_packages = [
'gradio', 'pandas', 'requests', 'beautifulsoup4',
'plotly', 'folium', 'numpy', 'geopy'
]
for package in required_packages:
install_package(package)
# Now import everything
import gradio as gr
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
import folium
from folium.plugins import MarkerCluster, HeatMap
import re
import numpy as np
from urllib.parse import urljoin
import time
import json
import os
from geopy.distance import geodesic
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')
# Function to convert degrees, minutes, seconds to decimal degrees
def dms_to_decimal(degrees, minutes, seconds, direction):
decimal = float(degrees) + float(minutes)/60 + float(seconds)/3600
if direction in ['S', 'W', '-']:
decimal = -decimal
return decimal
# Function to parse DMS coordinates from text
def parse_dms_coordinates(text):
if not text:
return None, None
# Clean up the text
text = text.replace('**', '').replace('\n', ' ').strip()
# Look for DMS format
lat_pattern = r'(\d+)°\s*(\d+)\'\s*(\d+\.?\d*)\'?\'\s*(?:Latitude|[NS])'
lon_pattern = r'(-?\d+)°\s*(\d+)\'\s*(\d+\.?\d*)\'?\'\s*(?:Longitude|[EW])'
lat_match = re.search(lat_pattern, text)
lon_match = re.search(lon_pattern, text)
latitude = None
longitude = None
if lat_match:
lat_deg, lat_min, lat_sec = lat_match.groups()
# Determine direction (N positive, S negative)
lat_dir = 'N'
if 'S' in text:
lat_dir = 'S'
latitude = dms_to_decimal(lat_deg, lat_min, lat_sec, lat_dir)
if lon_match:
lon_deg, lon_min, lon_sec = lon_match.groups()
# Determine direction (E positive, W negative)
lon_dir = 'E'
if 'W' in text or '-' in lon_deg:
lon_dir = 'W'
longitude = dms_to_decimal(lon_deg.replace('-', ''), lon_min, lon_sec, lon_dir)
return latitude, longitude
# Function to fetch NASA FIRMS data
def fetch_firms_data():
"""
Fetch NASA FIRMS VIIRS active fire data for the last 24 hours
Filters for USA only and returns relevant fire hotspot data
"""
firms_url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/viirs/csv/J1_VIIRS_C2_Global_24h.csv"
try:
print("Fetching NASA FIRMS data...")
response = requests.get(firms_url, timeout=60)
response.raise_for_status()
# Read CSV data
from io import StringIO
firms_df = pd.read_csv(StringIO(response.text))
print(f"Retrieved {len(firms_df)} global fire hotspots")
# Filter for USA coordinates (approximate bounding box)
# Continental US, Alaska, Hawaii
usa_firms = firms_df[
(
# Continental US
((firms_df['latitude'] >= 24.5) & (firms_df['latitude'] <= 49.0) &
(firms_df['longitude'] >= -125.0) & (firms_df['longitude'] <= -66.0)) |
# Alaska
((firms_df['latitude'] >= 54.0) & (firms_df['latitude'] <= 72.0) &
(firms_df['longitude'] >= -180.0) & (firms_df['longitude'] <= -130.0)) |
# Hawaii
((firms_df['latitude'] >= 18.0) & (firms_df['latitude'] <= 23.0) &
(firms_df['longitude'] >= -162.0) & (firms_df['longitude'] <= -154.0))
)
].copy()
print(f"Filtered to {len(usa_firms)} USA fire hotspots")
# Add datetime column for easier processing
usa_firms['datetime'] = pd.to_datetime(usa_firms['acq_date'] + ' ' + usa_firms['acq_time'].astype(str).str.zfill(4),
format='%Y-%m-%d %H%M')
# Sort by acquisition time (most recent first)
usa_firms = usa_firms.sort_values('datetime', ascending=False)
return usa_firms
except Exception as e:
print(f"Error fetching FIRMS data: {e}")
return pd.DataFrame()
# Function to match FIRMS hotspots with InciWeb incidents
def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
"""
Match FIRMS hotspots to InciWeb incidents based on geographic proximity
Args:
inciweb_df: DataFrame with InciWeb incident data (must have latitude/longitude)
firms_df: DataFrame with FIRMS hotspot data
max_distance_km: Maximum distance in km to consider a match
Returns:
Enhanced inciweb_df with FIRMS data and activity status
"""
if firms_df.empty or inciweb_df.empty:
return inciweb_df
print(f"Matching {len(firms_df)} FIRMS hotspots to {len(inciweb_df)} InciWeb incidents...")
# Initialize new columns
inciweb_df = inciweb_df.copy()
inciweb_df['firms_hotspots'] = 0
inciweb_df['total_frp'] = 0.0 # Fire Radiative Power
inciweb_df['avg_confidence'] = 0.0
inciweb_df['latest_hotspot'] = None
inciweb_df['is_active'] = False
inciweb_df['hotspot_coords'] = None
inciweb_df['activity_level'] = 'Unknown'
# Only process incidents that have coordinates
incidents_with_coords = inciweb_df[
(inciweb_df['latitude'].notna()) & (inciweb_df['longitude'].notna())
].copy()
print(f"Processing {len(incidents_with_coords)} incidents with coordinates...")
for idx, incident in incidents_with_coords.iterrows():
incident_coords = (incident['latitude'], incident['longitude'])
# Find FIRMS hotspots within the specified distance
hotspot_distances = []
matched_hotspots = []
for _, hotspot in firms_df.iterrows():
hotspot_coords = (hotspot['latitude'], hotspot['longitude'])
try:
distance = geodesic(incident_coords, hotspot_coords).kilometers
if distance <= max_distance_km:
hotspot_distances.append(distance)
matched_hotspots.append(hotspot)
except Exception as e:
continue # Skip invalid coordinates
if matched_hotspots:
matched_df = pd.DataFrame(matched_hotspots)
# Calculate aggregated metrics
num_hotspots = len(matched_hotspots)
total_frp = matched_df['frp'].sum() if 'frp' in matched_df.columns else 0
avg_confidence = matched_df['confidence'].mean() if 'confidence' in matched_df.columns else 0
latest_hotspot = matched_df['datetime'].max() if 'datetime' in matched_df.columns else None
# Determine activity level based on hotspot count and FRP
if num_hotspots >= 20 and total_frp >= 100:
activity_level = 'Very High'
elif num_hotspots >= 10 and total_frp >= 50:
activity_level = 'High'
elif num_hotspots >= 5 and total_frp >= 20:
activity_level = 'Medium'
elif num_hotspots >= 1:
activity_level = 'Low'
else:
activity_level = 'Minimal'
# Update the incident data
inciweb_df.at[idx, 'firms_hotspots'] = num_hotspots
inciweb_df.at[idx, 'total_frp'] = total_frp
inciweb_df.at[idx, 'avg_confidence'] = avg_confidence
inciweb_df.at[idx, 'latest_hotspot'] = latest_hotspot
inciweb_df.at[idx, 'is_active'] = True
inciweb_df.at[idx, 'activity_level'] = activity_level
# Store hotspot coordinates for visualization
hotspot_coords = [(hs['latitude'], hs['longitude'], hs.get('frp', 1))
for hs in matched_hotspots]
inciweb_df.at[idx, 'hotspot_coords'] = hotspot_coords
print(f" {incident['name']}: {num_hotspots} hotspots, {total_frp:.1f} FRP, {activity_level} activity")
# Mark incidents without recent hotspots as potentially inactive
active_count = (inciweb_df['is_active'] == True).sum()
total_with_coords = len(incidents_with_coords)
print(f"Found {active_count} active incidents out of {total_with_coords} with coordinates")
return inciweb_df
# Function to scrape InciWeb data from the accessible view page
def fetch_inciweb_data():
base_url = "https://inciweb.wildfire.gov"
accessible_url = urljoin(base_url, "/accessible-view")
try:
print(f"Fetching data from: {accessible_url}")
response = requests.get(accessible_url, timeout=30)
response.raise_for_status()
except requests.exceptions.RequestException as e:
print(f"Error fetching data from InciWeb: {e}")
return pd.DataFrame()
soup = BeautifulSoup(response.content, "html.parser")
incidents = []
# Find all incident links and process them
incident_links = soup.find_all("a", href=re.compile(r"/incident-information/"))
for link in incident_links:
try:
incident = {}
# Extract incident name and ID from link
incident["name"] = link.text.strip()
incident["link"] = urljoin(base_url, link.get("href"))
incident["id"] = link.get("href").split("/")[-1]
# Navigate through the structure to get incident details
row = link.parent
if row and row.name == "td":
row_cells = row.parent.find_all("td")
# Parse the row cells to extract information
if len(row_cells) >= 5:
incident_type_cell = row_cells[1] if len(row_cells) > 1 else None
if incident_type_cell:
incident["type"] = incident_type_cell.text.strip()
location_cell = row_cells[2] if len(row_cells) > 2 else None
if location_cell:
incident["location"] = location_cell.text.strip()
state_match = re.search(r'([A-Z]{2})', incident["location"])
if state_match:
incident["state"] = state_match.group(1)
else:
state_parts = incident["location"].split(',')
if state_parts:
incident["state"] = state_parts[0].strip()
else:
incident["state"] = None
size_cell = row_cells[3] if len(row_cells) > 3 else None
if size_cell:
size_text = size_cell.text.strip()
size_match = re.search(r'(\d+(?:,\d+)*)', size_text)
if size_match:
incident["size"] = int(size_match.group(1).replace(',', ''))
else:
incident["size"] = None
updated_cell = row_cells[4] if len(row_cells) > 4 else None
if updated_cell:
incident["updated"] = updated_cell.text.strip()
incidents.append(incident)
except Exception as e:
print(f"Error processing incident: {e}")
continue
df = pd.DataFrame(incidents)
# Ensure all expected columns exist
for col in ["size", "type", "location", "state", "updated"]:
if col not in df.columns:
df[col] = None
df["size"] = pd.to_numeric(df["size"], errors="coerce")
print(f"Fetched {len(df)} incidents")
return df
# Enhanced coordinate extraction with multiple methods
def get_incident_coordinates_basic(incident_url):
"""Enhanced coordinate extraction with fallback methods"""
try:
print(f" Fetching coordinates from: {incident_url}")
response = requests.get(incident_url, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Method 1: Look for meta tags with coordinates
meta_tags = soup.find_all("meta")
for meta in meta_tags:
if meta.get("name") == "geo.position":
coords = meta.get("content", "").split(";")
if len(coords) >= 2:
try:
lat, lon = float(coords[0].strip()), float(coords[1].strip())
print(f" Found coordinates via meta tags: {lat}, {lon}")
return lat, lon
except ValueError:
pass
# Method 2: Look for coordinate table rows
for row in soup.find_all('tr'):
th = row.find('th')
if th and 'Coordinates' in th.get_text(strip=True):
coord_cell = row.find('td')
if coord_cell:
coord_text = coord_cell.get_text(strip=True)
# Try to extract decimal coordinates
lat_match = re.search(r'(-?\d+\.?\d+)', coord_text)
if lat_match:
# Look for longitude after latitude
lon_match = re.search(r'(-?\d+\.?\d+)', coord_text[lat_match.end():])
if lon_match:
try:
lat = float(lat_match.group(1))
lon = float(lon_match.group(1))
print(f" Found coordinates via table: {lat}, {lon}")
return lat, lon
except ValueError:
pass
# Method 3: Look for script tags with map data
script_tags = soup.find_all("script")
for script in script_tags:
if not script.string:
continue
script_text = script.string
# Look for map initialization patterns
if "L.map" in script_text or "leaflet" in script_text.lower():
setview_match = re.search(r'setView\s*\(\s*\[\s*(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)\s*\]',
script_text, re.IGNORECASE)
if setview_match:
lat, lon = float(setview_match.group(1)), float(setview_match.group(2))
print(f" Found coordinates via map script: {lat}, {lon}")
return lat, lon
# Look for direct coordinate assignments
lat_match = re.search(r'(?:lat|latitude)\s*[=:]\s*(-?\d+\.?\d*)', script_text, re.IGNORECASE)
lon_match = re.search(r'(?:lon|lng|longitude)\s*[=:]\s*(-?\d+\.?\d*)', script_text, re.IGNORECASE)
if lat_match and lon_match:
lat, lon = float(lat_match.group(1)), float(lon_match.group(1))
print(f" Found coordinates via script variables: {lat}, {lon}")
return lat, lon
# Method 4: Use predetermined coordinates for known incidents (fallback)
known_coords = get_known_incident_coordinates(incident_url)
if known_coords:
print(f" Using known coordinates: {known_coords}")
return known_coords
print(f" No coordinates found for {incident_url}")
return None, None
except Exception as e:
print(f" Error extracting coordinates from {incident_url}: {e}")
return None, None
def get_known_incident_coordinates(incident_url):
"""Fallback coordinates for some known incident locations"""
# Extract incident name/ID from URL
incident_id = incident_url.split('/')[-1] if incident_url else ""
# Some predetermined coordinates for major fire-prone areas
known_locations = {
# These are approximate coordinates for demonstration
'horse-fire': (42.0, -104.0), # Wyoming
'aggie-creek-fire': (64.0, -153.0), # Alaska
'big-creek-fire': (47.0, -114.0), # Montana
'conner-fire': (39.5, -116.0), # Nevada
'trout-fire': (35.0, -106.0), # New Mexico
'basin-fire': (34.0, -112.0), # Arizona
'rowena-fire': (45.0, -121.0), # Oregon
'post-fire': (44.0, -115.0), # Idaho
}
for key, coords in known_locations.items():
if key in incident_id.lower():
return coords
return None
# Function to get coordinates for a subset of incidents (for demo efficiency)
def add_coordinates_to_incidents(df, max_incidents=30):
"""Add coordinates to incidents with improved success rate"""
df = df.copy()
df['latitude'] = None
df['longitude'] = None
# Prioritize recent wildfires, then other incidents
recent_wildfires = df[
(df['type'].str.contains('Wildfire', na=False)) &
(df['updated'].str.contains('ago|seconds|minutes|hours', na=False))
].head(max_incidents // 2)
other_incidents = df[
~df.index.isin(recent_wildfires.index)
].head(max_incidents // 2)
sample_df = pd.concat([recent_wildfires, other_incidents]).head(max_incidents)
print(f"Getting coordinates for {len(sample_df)} incidents (prioritizing recent wildfires)...")
success_count = 0
for idx, row in sample_df.iterrows():
if pd.notna(row.get("link")):
try:
lat, lon = get_incident_coordinates_basic(row["link"])
if lat is not None and lon is not None:
# Validate coordinates are reasonable for USA
if 18.0 <= lat <= 72.0 and -180.0 <= lon <= -65.0: # USA bounds including Alaska/Hawaii
df.at[idx, 'latitude'] = lat
df.at[idx, 'longitude'] = lon
success_count += 1
print(f" ✅ {row['name']}: {lat:.4f}, {lon:.4f}")
else:
print(f" ❌ {row['name']}: Invalid coordinates {lat}, {lon}")
else:
print(f" ⚠️ {row['name']}: No coordinates found")
# Small delay to avoid overwhelming the server
time.sleep(0.3)
except Exception as e:
print(f" ❌ Error getting coordinates for {row['name']}: {e}")
continue
print(f"Successfully extracted coordinates for {success_count}/{len(sample_df)} incidents")
return df
# Enhanced map generation with FIRMS data
def generate_enhanced_map(df, firms_df):
"""Generate map with both InciWeb incidents and FIRMS hotspots"""
# Create map centered on the US
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
# Add FIRMS heat map layer for all USA hotspots (even if no InciWeb coordinates)
if not firms_df.empty:
print(f"Adding {len(firms_df)} FIRMS hotspots to map...")
heat_data = [[row['latitude'], row['longitude'], min(row.get('frp', 1), 100)]
for _, row in firms_df.iterrows()]
if heat_data:
HeatMap(
heat_data,
name="Fire Intensity Heatmap (NASA FIRMS)",
radius=15,
blur=10,
max_zoom=1,
gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}
).add_to(m)
# Add some sample FIRMS points as markers
sample_firms = firms_df.head(100) # Show top 100 hotspots as individual markers
for _, hotspot in sample_firms.iterrows():
folium.CircleMarker(
location=[hotspot['latitude'], hotspot['longitude']],
radius=2 + min(hotspot.get('frp', 1) / 10, 8),
popup=f"🔥 FIRMS Hotspot
FRP: {hotspot.get('frp', 'N/A')} MW
Confidence: {hotspot.get('confidence', 'N/A')}%
Time: {hotspot.get('acq_time', 'N/A')}",
color='red',
fillColor='orange',
fillOpacity=0.7,
weight=1
).add_to(m)
# Add incident markers if we have coordinates
incidents_with_coords = df[(df['latitude'].notna()) & (df['longitude'].notna())]
if not incidents_with_coords.empty:
print(f"Adding {len(incidents_with_coords)} InciWeb incidents with coordinates to map...")
# Add incident markers
incident_cluster = MarkerCluster(name="InciWeb Incidents").add_to(m)
# Track statistics
active_incidents = 0
inactive_incidents = 0
for _, row in incidents_with_coords.iterrows():
lat, lon = row['latitude'], row['longitude']
# Determine marker color based on activity and type
if row.get('is_active', False):
active_incidents += 1
activity_level = row.get('activity_level', 'Unknown')
if activity_level == 'Very High':
color = 'red'
icon = 'fire'
elif activity_level == 'High':
color = 'orange'
icon = 'fire'
elif activity_level == 'Medium':
color = 'yellow'
icon = 'fire'
else:
color = 'lightred'
icon = 'fire'
else:
inactive_incidents += 1
color = 'gray'
icon = 'pause'
# Create detailed popup
popup_content = f"""