nakas commited on
Commit
1fa2eff
·
verified ·
1 Parent(s): 6da1d69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -32
app.py CHANGED
@@ -85,7 +85,7 @@ def parse_dms_coordinates(text):
85
  def fetch_firms_data():
86
  """
87
  Fetch NASA FIRMS VIIRS active fire data for the last 24 hours
88
- Filters for USA only and returns relevant fire hotspot data
89
  """
90
  firms_url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/viirs/csv/J1_VIIRS_C2_Global_24h.csv"
91
 
@@ -118,12 +118,36 @@ def fetch_firms_data():
118
 
119
  print(f"Filtered to {len(usa_firms)} USA fire hotspots")
120
 
121
- # Add datetime column for easier processing
122
- usa_firms['datetime'] = pd.to_datetime(usa_firms['acq_date'] + ' ' + usa_firms['acq_time'].astype(str).str.zfill(4),
123
- format='%Y-%m-%d %H%M')
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- # Sort by acquisition time (most recent first)
126
- usa_firms = usa_firms.sort_values('datetime', ascending=False)
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  return usa_firms
129
 
@@ -135,7 +159,7 @@ def fetch_firms_data():
135
  def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
136
  """
137
  Match FIRMS hotspots to InciWeb incidents based on geographic proximity
138
- Enhanced with better error handling
139
  """
140
  if firms_df.empty or inciweb_df.empty:
141
  print("Warning: Empty dataframes passed to matching function")
@@ -163,31 +187,46 @@ def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
163
 
164
  for idx, incident in incidents_with_coords.iterrows():
165
  try:
166
- incident_coords = (incident['latitude'], incident['longitude'])
167
 
168
  # Find FIRMS hotspots within the specified distance
169
- hotspot_distances = []
170
  matched_hotspots = []
171
 
172
  for _, hotspot in firms_df.iterrows():
173
  try:
174
- hotspot_coords = (hotspot['latitude'], hotspot['longitude'])
 
 
 
175
  distance = geodesic(incident_coords, hotspot_coords).kilometers
176
 
177
  if distance <= max_distance_km:
178
- hotspot_distances.append(distance)
179
- matched_hotspots.append(hotspot)
180
- except Exception as e:
181
- continue # Skip invalid coordinates
 
 
 
 
 
 
 
 
 
 
182
 
183
  if matched_hotspots:
184
- matched_df = pd.DataFrame(matched_hotspots)
185
-
186
  # Calculate aggregated metrics safely
187
  num_hotspots = len(matched_hotspots)
188
- total_frp = float(matched_df['frp'].sum()) if 'frp' in matched_df.columns else 0.0
189
- avg_confidence = float(matched_df['confidence'].mean()) if 'confidence' in matched_df.columns else 0.0
190
- latest_hotspot = matched_df['datetime'].max() if 'datetime' in matched_df.columns else None
 
 
 
 
 
191
 
192
  # Determine activity level based on hotspot count and FRP
193
  if num_hotspots >= 20 and total_frp >= 100:
@@ -209,10 +248,10 @@ def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
209
  inciweb_df.at[idx, 'is_active'] = True
210
  inciweb_df.at[idx, 'activity_level'] = activity_level
211
 
212
- # Store hotspot coordinates for visualization (simplified)
213
- hotspot_coords = [(float(hs['latitude']), float(hs['longitude']), float(hs.get('frp', 1)))
214
- for hs in matched_hotspots[:10]] # Limit to 10 for performance
215
- inciweb_df.at[idx, 'hotspot_coords'] = str(hotspot_coords) # Store as string for safety
216
 
217
  print(f" {incident['name']}: {num_hotspots} hotspots, {total_frp:.1f} FRP, {activity_level} activity")
218
 
@@ -220,7 +259,7 @@ def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
220
  print(f" Error processing incident {incident.get('name', 'Unknown')}: {e}")
221
  continue
222
 
223
- # Mark incidents without recent hotspots as potentially inactive
224
  active_count = (inciweb_df['is_active'] == True).sum()
225
  total_with_coords = len(incidents_with_coords)
226
 
@@ -230,14 +269,20 @@ def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
230
 
231
  except Exception as e:
232
  print(f"Error in match_firms_to_inciweb: {e}")
233
- # Return original dataframe with safety columns
234
- inciweb_df['firms_hotspots'] = 0
235
- inciweb_df['total_frp'] = 0.0
236
- inciweb_df['avg_confidence'] = 0.0
237
- inciweb_df['latest_hotspot'] = None
238
- inciweb_df['is_active'] = False
239
- inciweb_df['hotspot_coords'] = None
240
- inciweb_df['activity_level'] = 'Unknown'
 
 
 
 
 
 
241
  return inciweb_df
242
 
243
  # Function to scrape InciWeb data from the accessible view page
 
85
  def fetch_firms_data():
86
  """
87
  Fetch NASA FIRMS VIIRS active fire data for the last 24 hours
88
+ Filters for USA only and returns relevant fire hotspot data with cleaned numeric fields
89
  """
90
  firms_url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/viirs/csv/J1_VIIRS_C2_Global_24h.csv"
91
 
 
118
 
119
  print(f"Filtered to {len(usa_firms)} USA fire hotspots")
120
 
121
+ # Clean numeric columns to handle "nominal" values
122
+ if 'frp' in usa_firms.columns:
123
+ # Clean FRP column
124
+ usa_firms['frp'] = usa_firms['frp'].astype(str).str.replace('nominal', '', regex=False)
125
+ usa_firms['frp'] = usa_firms['frp'].str.replace(r'[^\d\.]', '', regex=True) # Keep only digits and decimals
126
+ usa_firms['frp'] = usa_firms['frp'].replace('', '0') # Replace empty strings with 0
127
+ usa_firms['frp'] = pd.to_numeric(usa_firms['frp'], errors='coerce').fillna(0)
128
+ print(f"Cleaned FRP column, mean FRP: {usa_firms['frp'].mean():.2f}")
129
+
130
+ if 'confidence' in usa_firms.columns:
131
+ # Clean confidence column
132
+ usa_firms['confidence'] = usa_firms['confidence'].astype(str).str.replace('nominal', '', regex=False)
133
+ usa_firms['confidence'] = usa_firms['confidence'].str.replace(r'[^\d\.]', '', regex=True)
134
+ usa_firms['confidence'] = usa_firms['confidence'].replace('', '50') # Default confidence
135
+ usa_firms['confidence'] = pd.to_numeric(usa_firms['confidence'], errors='coerce').fillna(50)
136
+ print(f"Cleaned confidence column, mean confidence: {usa_firms['confidence'].mean():.2f}")
137
 
138
+ # Add datetime column for easier processing
139
+ if 'acq_date' in usa_firms.columns and 'acq_time' in usa_firms.columns:
140
+ try:
141
+ usa_firms['datetime'] = pd.to_datetime(
142
+ usa_firms['acq_date'] + ' ' + usa_firms['acq_time'].astype(str).str.zfill(4),
143
+ format='%Y-%m-%d %H%M',
144
+ errors='coerce'
145
+ )
146
+ # Sort by acquisition time (most recent first)
147
+ usa_firms = usa_firms.sort_values('datetime', ascending=False)
148
+ print(f"Added datetime column, latest detection: {usa_firms['datetime'].max()}")
149
+ except Exception as e:
150
+ print(f"Warning: Could not create datetime column: {e}")
151
 
152
  return usa_firms
153
 
 
159
  def match_firms_to_inciweb(inciweb_df, firms_df, max_distance_km=50):
160
  """
161
  Match FIRMS hotspots to InciWeb incidents based on geographic proximity
162
+ Enhanced with better error handling and data cleaning
163
  """
164
  if firms_df.empty or inciweb_df.empty:
165
  print("Warning: Empty dataframes passed to matching function")
 
187
 
188
  for idx, incident in incidents_with_coords.iterrows():
189
  try:
190
+ incident_coords = (float(incident['latitude']), float(incident['longitude']))
191
 
192
  # Find FIRMS hotspots within the specified distance
 
193
  matched_hotspots = []
194
 
195
  for _, hotspot in firms_df.iterrows():
196
  try:
197
+ hotspot_lat = float(hotspot['latitude'])
198
+ hotspot_lon = float(hotspot['longitude'])
199
+ hotspot_coords = (hotspot_lat, hotspot_lon)
200
+
201
  distance = geodesic(incident_coords, hotspot_coords).kilometers
202
 
203
  if distance <= max_distance_km:
204
+ # Create a clean hotspot record with safe conversions
205
+ clean_hotspot = {
206
+ 'latitude': hotspot_lat,
207
+ 'longitude': hotspot_lon,
208
+ 'frp': float(hotspot.get('frp', 0)) if pd.notna(hotspot.get('frp')) else 0.0,
209
+ 'confidence': float(hotspot.get('confidence', 50)) if pd.notna(hotspot.get('confidence')) else 50.0,
210
+ 'datetime': hotspot.get('datetime', None),
211
+ 'distance': distance
212
+ }
213
+ matched_hotspots.append(clean_hotspot)
214
+
215
+ except (ValueError, TypeError, KeyError) as e:
216
+ # Skip invalid hotspot data
217
+ continue
218
 
219
  if matched_hotspots:
 
 
220
  # Calculate aggregated metrics safely
221
  num_hotspots = len(matched_hotspots)
222
+ total_frp = sum(hs['frp'] for hs in matched_hotspots)
223
+ avg_confidence = sum(hs['confidence'] for hs in matched_hotspots) / num_hotspots if num_hotspots > 0 else 0.0
224
+
225
+ # Get latest hotspot time
226
+ latest_hotspot = None
227
+ hotspot_times = [hs['datetime'] for hs in matched_hotspots if hs['datetime'] is not None]
228
+ if hotspot_times:
229
+ latest_hotspot = max(hotspot_times)
230
 
231
  # Determine activity level based on hotspot count and FRP
232
  if num_hotspots >= 20 and total_frp >= 100:
 
248
  inciweb_df.at[idx, 'is_active'] = True
249
  inciweb_df.at[idx, 'activity_level'] = activity_level
250
 
251
+ # Store simplified hotspot coordinates for visualization
252
+ hotspot_coords_str = str([(hs['latitude'], hs['longitude'], hs['frp'])
253
+ for hs in matched_hotspots[:10]]) # Limit to 10 for performance
254
+ inciweb_df.at[idx, 'hotspot_coords'] = hotspot_coords_str
255
 
256
  print(f" {incident['name']}: {num_hotspots} hotspots, {total_frp:.1f} FRP, {activity_level} activity")
257
 
 
259
  print(f" Error processing incident {incident.get('name', 'Unknown')}: {e}")
260
  continue
261
 
262
+ # Calculate final statistics
263
  active_count = (inciweb_df['is_active'] == True).sum()
264
  total_with_coords = len(incidents_with_coords)
265
 
 
269
 
270
  except Exception as e:
271
  print(f"Error in match_firms_to_inciweb: {e}")
272
+ # Return original dataframe with safety columns if matching completely fails
273
+ inciweb_df = inciweb_df.copy()
274
+ for col in ['firms_hotspots', 'total_frp', 'avg_confidence', 'latest_hotspot', 'is_active', 'hotspot_coords', 'activity_level']:
275
+ if col not in inciweb_df.columns:
276
+ if col in ['firms_hotspots']:
277
+ inciweb_df[col] = 0
278
+ elif col in ['total_frp', 'avg_confidence']:
279
+ inciweb_df[col] = 0.0
280
+ elif col in ['is_active']:
281
+ inciweb_df[col] = False
282
+ elif col in ['activity_level']:
283
+ inciweb_df[col] = 'Unknown'
284
+ else:
285
+ inciweb_df[col] = None
286
  return inciweb_df
287
 
288
  # Function to scrape InciWeb data from the accessible view page