Update app.py
Browse files
app.py
CHANGED
@@ -1,694 +1,241 @@
|
|
1 |
-
import gradio as gr
|
2 |
import requests
|
3 |
-
import folium
|
4 |
-
import json
|
5 |
-
import time
|
6 |
-
import os
|
7 |
-
from typing import Dict, List, Optional, Tuple
|
8 |
import pandas as pd
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
class
|
11 |
-
"""
|
12 |
|
13 |
def __init__(self):
|
14 |
-
self.base_url = "https://
|
15 |
-
self.aqi_colors = {
|
16 |
-
"Good": "#00E400",
|
17 |
-
"Moderate": "#FFFF00",
|
18 |
-
"Unhealthy for Sensitive Groups": "#FF7E00",
|
19 |
-
"Unhealthy": "#FF0000",
|
20 |
-
"Very Unhealthy": "#8F3F97",
|
21 |
-
"Hazardous": "#7E0023"
|
22 |
-
}
|
23 |
-
self.aqi_ranges = {
|
24 |
-
(0, 50): "Good",
|
25 |
-
(51, 100): "Moderate",
|
26 |
-
(101, 150): "Unhealthy for Sensitive Groups",
|
27 |
-
(151, 200): "Unhealthy",
|
28 |
-
(201, 300): "Very Unhealthy",
|
29 |
-
(301, 500): "Hazardous"
|
30 |
-
}
|
31 |
-
|
32 |
-
def get_aqi_category(self, aqi_value: int) -> str:
|
33 |
-
"""Get AQI category based on value"""
|
34 |
-
for (min_val, max_val), category in self.aqi_ranges.items():
|
35 |
-
if min_val <= aqi_value <= max_val:
|
36 |
-
return category
|
37 |
-
return "Unknown"
|
38 |
-
|
39 |
-
def get_aqi_color(self, category: str) -> str:
|
40 |
-
"""Get color for AQI category"""
|
41 |
-
return self.aqi_colors.get(category, "#808080")
|
42 |
-
|
43 |
-
def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
|
44 |
-
"""
|
45 |
-
DIRECT ACCESS: Get ALL monitoring stations from hourly data files
|
46 |
-
Returns: (data_list, status_message)
|
47 |
-
"""
|
48 |
-
if not api_key or api_key.strip() == "":
|
49 |
-
return [], "β Please enter a valid AirNow API key"
|
50 |
-
|
51 |
-
print(f"π― DIRECT FILE ACCESS: Grabbing hourly data files with ALL monitoring sites...")
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
# STRATEGY 1: Access hourly data files directly
|
57 |
-
print("π STRATEGY 1: Accessing bulk hourly data files...")
|
58 |
-
hourly_data = self.get_hourly_data_files(api_key)
|
59 |
-
all_data.extend(hourly_data)
|
60 |
-
print(f"Hourly files found: {len(hourly_data)} station records")
|
61 |
-
|
62 |
-
# STRATEGY 2: Try file products mentioned in research
|
63 |
-
print("π STRATEGY 2: Accessing file products and bulk endpoints...")
|
64 |
-
file_data = self.get_file_products(api_key)
|
65 |
-
all_data.extend(file_data)
|
66 |
-
print(f"File products found: {len(file_data)} additional records")
|
67 |
-
|
68 |
-
# STRATEGY 3: Try data dumps and bulk exports
|
69 |
-
print("π STRATEGY 3: Accessing data dumps and exports...")
|
70 |
-
export_data = self.get_bulk_exports(api_key)
|
71 |
-
all_data.extend(export_data)
|
72 |
-
print(f"Bulk exports found: {len(export_data)} additional records")
|
73 |
-
|
74 |
-
# STRATEGY 4: Parse any CSV/XML/JSON data files
|
75 |
-
print("π STRATEGY 4: Parsing structured data files...")
|
76 |
-
parsed_data = self.parse_structured_files(api_key)
|
77 |
-
all_data.extend(parsed_data)
|
78 |
-
print(f"Parsed files found: {len(parsed_data)} additional records")
|
79 |
-
|
80 |
-
print(f"π― Total raw data from files: {len(all_data)} records")
|
81 |
-
|
82 |
-
if not all_data:
|
83 |
-
return [], f"β οΈ No bulk data files accessible with this API key."
|
84 |
-
|
85 |
-
# Comprehensive deduplication
|
86 |
-
unique_data = self.comprehensive_deduplication(all_data)
|
87 |
-
|
88 |
-
print(f"π FINAL RESULT: {len(unique_data)} unique monitoring stations from bulk files")
|
89 |
-
|
90 |
-
return unique_data, f"π― BULK FILE ACCESS: Found {len(unique_data)} monitoring stations from hourly data files"
|
91 |
-
|
92 |
-
except Exception as e:
|
93 |
-
print(f"File access error: {str(e)}")
|
94 |
-
return [], f"β Error accessing bulk files: {str(e)}"
|
95 |
-
|
96 |
-
def get_hourly_data_files(self, api_key: str) -> List[Dict]:
|
97 |
-
"""Access hourly data files containing all monitoring sites"""
|
98 |
-
data = []
|
99 |
|
100 |
-
#
|
101 |
-
|
102 |
-
f"{self.base_url}/files/data/",
|
103 |
-
f"{self.base_url}/files/hourly/",
|
104 |
-
f"{self.base_url}/files/",
|
105 |
-
f"{self.base_url}/aq/data/hourly/",
|
106 |
-
f"{self.base_url}/aq/files/",
|
107 |
-
f"{self.base_url}/data/",
|
108 |
-
f"{self.base_url}/hourly/",
|
109 |
-
# Try specific file formats
|
110 |
-
f"{self.base_url}/files/HourlyData.dat",
|
111 |
-
f"{self.base_url}/files/MonitoringSites.dat",
|
112 |
-
f"{self.base_url}/files/reportingarea.dat",
|
113 |
-
f"{self.base_url}/files/HourlyData.csv",
|
114 |
-
f"{self.base_url}/files/stations.csv",
|
115 |
-
f"{self.base_url}/files/current.csv",
|
116 |
-
f"{self.base_url}/files/sites.xml",
|
117 |
-
f"{self.base_url}/files/data.json",
|
118 |
-
]
|
119 |
|
120 |
-
|
|
|
|
|
121 |
try:
|
122 |
-
|
|
|
123 |
|
124 |
-
# Try
|
125 |
-
|
126 |
-
|
127 |
-
timeout=30)
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
try:
|
134 |
-
json_data = response.json()
|
135 |
-
if isinstance(json_data, list):
|
136 |
-
for record in json_data:
|
137 |
-
record['source'] = 'hourly_file'
|
138 |
-
data.extend(json_data)
|
139 |
-
print(f"Parsed JSON: {len(json_data)} records")
|
140 |
-
continue
|
141 |
-
except:
|
142 |
-
pass
|
143 |
-
|
144 |
-
# Try to parse as CSV
|
145 |
-
try:
|
146 |
-
csv_data = self.parse_csv_response(response.text)
|
147 |
-
if csv_data:
|
148 |
-
data.extend(csv_data)
|
149 |
-
print(f"Parsed CSV: {len(csv_data)} records")
|
150 |
-
continue
|
151 |
-
except:
|
152 |
-
pass
|
153 |
|
154 |
-
#
|
155 |
-
|
156 |
-
|
157 |
-
if pipe_data:
|
158 |
-
data.extend(pipe_data)
|
159 |
-
print(f"Parsed pipe-delimited: {len(pipe_data)} records")
|
160 |
-
continue
|
161 |
-
except:
|
162 |
-
pass
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
|
168 |
-
|
|
|
|
|
|
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
except Exception as e:
|
171 |
-
print(f"Error
|
172 |
continue
|
|
|
|
|
173 |
|
174 |
return data
|
175 |
|
176 |
-
def
|
177 |
-
"""
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
file_products = [
|
182 |
-
# From airnowtech.org - these often have bulk data
|
183 |
-
"https://files.airnowtech.org/airnow/today/HourlyData.dat",
|
184 |
-
"https://files.airnowtech.org/airnow/today/daily_summary.dat",
|
185 |
-
"https://files.airnowtech.org/airnow/today/reportingarea.dat",
|
186 |
-
"https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat",
|
187 |
-
"https://files.airnowtech.org/airnow/HourlyData.dat",
|
188 |
-
"https://files.airnowtech.org/HourlyData.dat",
|
189 |
-
"https://files.airnowtech.org/reportingarea.dat",
|
190 |
-
"https://files.airnowtech.org/airnow/today/HourlyAQObs.dat",
|
191 |
-
"https://files.airnowtech.org/airnow/today/HourlyAQForecast.dat",
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
f"{
|
199 |
-
f"{self.base_url}/files/monitors.dat",
|
200 |
-
]
|
201 |
-
|
202 |
-
for file_url in file_products:
|
203 |
-
try:
|
204 |
-
print(f"Trying file: {file_url}")
|
205 |
-
|
206 |
-
# Try with and without API key
|
207 |
-
for use_api_key in [True, False]:
|
208 |
-
try:
|
209 |
-
if use_api_key and "airnowapi.org" in file_url:
|
210 |
-
params = {"API_KEY": api_key}
|
211 |
-
else:
|
212 |
-
params = {}
|
213 |
-
|
214 |
-
response = requests.get(file_url, params=params, timeout=30)
|
215 |
-
|
216 |
-
if response.status_code == 200 and response.text.strip():
|
217 |
-
print(f"β
File found: {file_url}")
|
218 |
-
|
219 |
-
# Parse the file content
|
220 |
-
file_data = self.parse_airnow_file_format(response.text)
|
221 |
-
if file_data:
|
222 |
-
for record in file_data:
|
223 |
-
record['source'] = 'file_product'
|
224 |
-
data.extend(file_data)
|
225 |
-
print(f"Parsed file: {len(file_data)} records")
|
226 |
-
break # Success, move to next file
|
227 |
-
|
228 |
-
except Exception as e:
|
229 |
-
continue
|
230 |
-
|
231 |
-
time.sleep(0.1)
|
232 |
-
|
233 |
-
except Exception as e:
|
234 |
-
continue
|
235 |
|
236 |
-
return
|
237 |
|
238 |
-
def
|
239 |
-
"""
|
|
|
240 |
data = []
|
241 |
|
242 |
-
|
243 |
-
f"{self.base_url}/aq/data/",
|
244 |
-
f"{self.base_url}/aq/observation/",
|
245 |
-
f"{self.base_url}/aq/monitoring/",
|
246 |
-
f"{self.base_url}/export/",
|
247 |
-
f"{self.base_url}/bulk/",
|
248 |
-
f"{self.base_url}/download/",
|
249 |
-
f"{self.base_url}/api/data/",
|
250 |
-
]
|
251 |
|
252 |
-
for
|
|
|
|
|
|
|
253 |
try:
|
254 |
-
|
255 |
-
|
256 |
-
"API_KEY": api_key,
|
257 |
-
"datatype": "monitoring",
|
258 |
-
"export": "all"
|
259 |
-
}
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
return data
|
276 |
-
|
277 |
-
def parse_structured_files(self, api_key: str) -> List[Dict]:
|
278 |
-
"""Try to get structured data files in various formats"""
|
279 |
-
data = []
|
280 |
-
|
281 |
-
# Try current hour data endpoint (should have all active stations)
|
282 |
-
try:
|
283 |
-
from datetime import datetime
|
284 |
-
import pytz
|
285 |
-
|
286 |
-
# Get current hour in Eastern Time (AirNow's timezone)
|
287 |
-
eastern = pytz.timezone('US/Eastern')
|
288 |
-
now = datetime.now(eastern)
|
289 |
-
|
290 |
-
# Try current hour endpoint
|
291 |
-
hour_endpoints = [
|
292 |
-
f"{self.base_url}/aq/observation/zipCode/current/",
|
293 |
-
f"{self.base_url}/aq/data/monitoring/current/",
|
294 |
-
f"{self.base_url}/aq/observation/latLong/current/",
|
295 |
-
]
|
296 |
-
|
297 |
-
# Try to get data for entire country using bounding box
|
298 |
-
usa_bbox = {
|
299 |
-
"minLat": 18.0, # Southern tip of Hawaii
|
300 |
-
"maxLat": 72.0, # Northern Alaska
|
301 |
-
"minLon": -180.0, # Western Alaska
|
302 |
-
"maxLon": -65.0 # Eastern Maine
|
303 |
-
}
|
304 |
-
|
305 |
-
for endpoint in hour_endpoints:
|
306 |
-
try:
|
307 |
-
if "zipCode" in endpoint:
|
308 |
-
# Use major ZIP codes to get broad coverage
|
309 |
-
major_zips = ["10001", "90210", "60601", "77001", "33101", "85001", "98101"]
|
310 |
-
for zipcode in major_zips:
|
311 |
-
params = {
|
312 |
-
"format": "application/json",
|
313 |
-
"zipCode": zipcode,
|
314 |
-
"distance": 200, # Max distance
|
315 |
-
"API_KEY": api_key
|
316 |
-
}
|
317 |
-
response = requests.get(endpoint, params=params, timeout=15)
|
318 |
-
if response.status_code == 200:
|
319 |
-
zip_data = response.json()
|
320 |
-
if zip_data:
|
321 |
-
data.extend(zip_data)
|
322 |
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
"latitude": 39.8283,
|
328 |
-
"longitude": -98.5795,
|
329 |
-
"distance": 2000, # Very large radius
|
330 |
-
"API_KEY": api_key
|
331 |
-
}
|
332 |
-
response = requests.get(endpoint, params=params, timeout=15)
|
333 |
-
if response.status_code == 200:
|
334 |
-
center_data = response.json()
|
335 |
-
if center_data:
|
336 |
-
data.extend(center_data)
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
if response.status_code == 200:
|
347 |
-
bbox_data = response.json()
|
348 |
-
if bbox_data:
|
349 |
-
data.extend(bbox_data)
|
350 |
-
|
351 |
-
except Exception as e:
|
352 |
-
continue
|
353 |
-
|
354 |
-
except Exception as e:
|
355 |
-
print(f"Error in structured files: {str(e)}")
|
356 |
|
|
|
357 |
return data
|
358 |
|
359 |
-
def
|
360 |
-
"""Parse
|
361 |
-
import csv
|
362 |
-
from io import StringIO
|
363 |
-
|
364 |
-
try:
|
365 |
-
reader = csv.DictReader(StringIO(text))
|
366 |
-
return [row for row in reader]
|
367 |
-
except:
|
368 |
-
return []
|
369 |
-
|
370 |
-
def parse_pipe_delimited(self, text: str) -> List[Dict]:
|
371 |
-
"""Parse pipe-delimited format (common in AirNow files)"""
|
372 |
-
lines = text.strip().split('\n')
|
373 |
-
if len(lines) < 2:
|
374 |
-
return []
|
375 |
-
|
376 |
-
try:
|
377 |
-
# First line might be headers
|
378 |
-
headers = lines[0].split('|')
|
379 |
-
data = []
|
380 |
-
|
381 |
-
for line in lines[1:]:
|
382 |
-
values = line.split('|')
|
383 |
-
if len(values) == len(headers):
|
384 |
-
record = dict(zip(headers, values))
|
385 |
-
data.append(record)
|
386 |
-
|
387 |
-
return data
|
388 |
-
except:
|
389 |
-
return []
|
390 |
-
|
391 |
-
def parse_airnow_file_format(self, text: str) -> List[Dict]:
|
392 |
-
"""Parse standard AirNow file format"""
|
393 |
lines = text.strip().split('\n')
|
394 |
data = []
|
395 |
|
396 |
for line in lines:
|
397 |
-
if not line.strip()
|
398 |
continue
|
399 |
-
|
400 |
-
try:
|
401 |
-
# Try pipe-delimited first
|
402 |
-
if '|' in line:
|
403 |
-
parts = line.split('|')
|
404 |
-
elif ',' in line:
|
405 |
-
parts = line.split(',')
|
406 |
-
elif '\t' in line:
|
407 |
-
parts = line.split('\t')
|
408 |
-
else:
|
409 |
-
continue
|
410 |
|
411 |
-
|
412 |
-
|
|
|
413 |
record = {
|
414 |
-
'DateObserved':
|
415 |
-
'HourObserved':
|
416 |
-
'
|
417 |
-
'
|
418 |
-
'
|
419 |
-
'
|
420 |
-
'
|
421 |
-
'
|
422 |
-
'
|
423 |
-
'
|
424 |
-
'source': 'parsed_file'
|
425 |
}
|
426 |
data.append(record)
|
427 |
except:
|
428 |
continue
|
429 |
-
|
430 |
return data
|
431 |
|
432 |
-
def
|
433 |
-
"""
|
434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
unique_data = []
|
|
|
436 |
|
437 |
-
for
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
item.get('ParameterName', ''),
|
443 |
-
item.get('SiteName', ''),
|
444 |
-
item.get('AgencyName', ''),
|
445 |
-
item.get('MonitorType', ''),
|
446 |
-
item.get('ReportingArea', ''),
|
447 |
-
item.get('StateCode', ''),
|
448 |
-
item.get('CountyCode', '')
|
449 |
-
)
|
450 |
-
|
451 |
-
if station_key not in seen_stations:
|
452 |
-
seen_stations.add(station_key)
|
453 |
-
unique_data.append(item)
|
454 |
|
|
|
455 |
return unique_data
|
456 |
-
|
457 |
-
def create_map(self, data: List[Dict]) -> str:
|
458 |
-
"""Create an interactive map with air quality data"""
|
459 |
-
if not data:
|
460 |
-
# Create a basic US map if no data
|
461 |
-
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
|
462 |
-
folium.Marker(
|
463 |
-
[39.8283, -98.5795],
|
464 |
-
popup="No data available. Please check your API key.",
|
465 |
-
icon=folium.Icon(color='red', icon='info-sign')
|
466 |
-
).add_to(m)
|
467 |
-
return m._repr_html_()
|
468 |
-
|
469 |
-
# Calculate center point of all data
|
470 |
-
lats = [item['Latitude'] for item in data if 'Latitude' in item]
|
471 |
-
lons = [item['Longitude'] for item in data if 'Longitude' in item]
|
472 |
-
|
473 |
-
if lats and lons:
|
474 |
-
center_lat = sum(lats) / len(lats)
|
475 |
-
center_lon = sum(lons) / len(lons)
|
476 |
-
else:
|
477 |
-
center_lat, center_lon = 39.8283, -98.5795 # Center of US
|
478 |
-
|
479 |
-
# Create map
|
480 |
-
m = folium.Map(location=[center_lat, center_lon], zoom_start=4)
|
481 |
-
|
482 |
-
# Add markers for each monitoring location
|
483 |
-
for item in data:
|
484 |
-
try:
|
485 |
-
lat = item.get('Latitude')
|
486 |
-
lon = item.get('Longitude')
|
487 |
-
aqi = item.get('AQI', 0)
|
488 |
-
parameter = item.get('ParameterName', 'Unknown')
|
489 |
-
area = item.get('ReportingArea', 'Unknown Area')
|
490 |
-
state = item.get('StateCode', 'Unknown')
|
491 |
-
category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))
|
492 |
-
|
493 |
-
if lat is None or lon is None:
|
494 |
-
continue
|
495 |
-
|
496 |
-
# Get color based on AQI category
|
497 |
-
color = self.get_aqi_color(category)
|
498 |
-
|
499 |
-
# Create popup content
|
500 |
-
popup_content = f"""
|
501 |
-
<div style="width: 200px;">
|
502 |
-
<h4>{area}, {state}</h4>
|
503 |
-
<p><b>AQI:</b> {aqi} ({category})</p>
|
504 |
-
<p><b>Parameter:</b> {parameter}</p>
|
505 |
-
<p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
|
506 |
-
<p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
|
507 |
-
</div>
|
508 |
-
"""
|
509 |
-
|
510 |
-
# Determine marker color based on AQI
|
511 |
-
if aqi <= 50:
|
512 |
-
marker_color = 'green'
|
513 |
-
elif aqi <= 100:
|
514 |
-
marker_color = 'orange' # Changed from 'yellow' to 'orange'
|
515 |
-
elif aqi <= 150:
|
516 |
-
marker_color = 'orange'
|
517 |
-
elif aqi <= 200:
|
518 |
-
marker_color = 'red'
|
519 |
-
elif aqi <= 300:
|
520 |
-
marker_color = 'purple'
|
521 |
-
else:
|
522 |
-
marker_color = 'darkred'
|
523 |
-
|
524 |
-
# Add marker
|
525 |
-
folium.Marker(
|
526 |
-
[lat, lon],
|
527 |
-
popup=folium.Popup(popup_content, max_width=250),
|
528 |
-
tooltip=f"{area}: AQI {aqi}",
|
529 |
-
icon=folium.Icon(color=marker_color, icon='cloud')
|
530 |
-
).add_to(m)
|
531 |
-
|
532 |
-
except Exception as e:
|
533 |
-
continue # Skip problematic markers
|
534 |
-
|
535 |
-
# Add legend
|
536 |
-
legend_html = """
|
537 |
-
<div style="position: fixed;
|
538 |
-
bottom: 50px; left: 50px; width: 150px; height: 180px;
|
539 |
-
background-color: white; border:2px solid grey; z-index:9999;
|
540 |
-
font-size:14px; padding: 10px">
|
541 |
-
<h4>AQI Legend</h4>
|
542 |
-
<p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
|
543 |
-
<p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
|
544 |
-
<p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
|
545 |
-
<p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
|
546 |
-
<p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
|
547 |
-
<p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
|
548 |
-
</div>
|
549 |
-
"""
|
550 |
-
m.get_root().html.add_child(folium.Element(legend_html))
|
551 |
-
|
552 |
-
return m._repr_html_()
|
553 |
-
|
554 |
-
def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
|
555 |
-
"""Create a data table from the air quality data"""
|
556 |
-
if not data:
|
557 |
-
return pd.DataFrame()
|
558 |
-
|
559 |
-
# Extract relevant columns
|
560 |
-
table_data = []
|
561 |
-
for item in data:
|
562 |
-
table_data.append({
|
563 |
-
'Reporting Area': item.get('ReportingArea', 'Unknown'),
|
564 |
-
'State': item.get('StateCode', 'Unknown'),
|
565 |
-
'AQI': item.get('AQI', 0),
|
566 |
-
'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
|
567 |
-
'Parameter': item.get('ParameterName', 'Unknown'),
|
568 |
-
'Date': item.get('DateObserved', 'Unknown'),
|
569 |
-
'Hour': item.get('HourObserved', 'Unknown'),
|
570 |
-
'Latitude': item.get('Latitude', 'Unknown'),
|
571 |
-
'Longitude': item.get('Longitude', 'Unknown')
|
572 |
-
})
|
573 |
-
|
574 |
-
df = pd.DataFrame(table_data)
|
575 |
-
return df.sort_values('AQI', ascending=False)
|
576 |
-
|
577 |
-
# Initialize the mapper
|
578 |
-
mapper = AirQualityMapper()
|
579 |
|
580 |
-
#
|
581 |
-
|
582 |
-
|
583 |
-
if env_api_key:
|
584 |
-
print(f"API key starts with: {env_api_key[:8]}...")
|
585 |
-
|
586 |
-
def update_map(api_key: str):
|
587 |
-
"""Update the map with fresh air quality data"""
|
588 |
-
# Check for environment variable first, then use provided key
|
589 |
-
env_api_key = os.getenv('AIRNOW_API_KEY')
|
590 |
-
if env_api_key:
|
591 |
-
api_key = env_api_key
|
592 |
-
|
593 |
-
if not api_key.strip():
|
594 |
-
return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "β No API key provided"
|
595 |
-
|
596 |
-
# Fetch data
|
597 |
-
data, status = mapper.fetch_airnow_data(api_key)
|
598 |
-
|
599 |
-
# Create map
|
600 |
-
map_html = mapper.create_map(data)
|
601 |
-
|
602 |
-
# Create data table
|
603 |
-
df = mapper.create_data_table(data)
|
604 |
-
|
605 |
-
return map_html, df, status
|
606 |
-
|
607 |
-
# Create Gradio interface
|
608 |
-
with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
|
609 |
-
# Check if API key is set as environment variable
|
610 |
-
env_api_key = os.getenv('AIRNOW_API_KEY')
|
611 |
-
api_key_status = "β
API key loaded from environment variable" if env_api_key else "β οΈ No environment variable set"
|
612 |
-
|
613 |
-
gr.Markdown(
|
614 |
-
f"""
|
615 |
-
# π¬οΈ AirNow Air Quality Sensor Map
|
616 |
-
|
617 |
-
**API Key Status**: {api_key_status}
|
618 |
-
|
619 |
-
This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.
|
620 |
-
|
621 |
-
## How to use:
|
622 |
-
1. **API Key**: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
|
623 |
-
2. **Click "Load Air Quality Data"** to fetch current readings from 500+ monitoring stations nationwide
|
624 |
-
3. **Explore the map**: Click on markers to see detailed information about each monitoring station
|
625 |
-
|
626 |
-
## Enhanced Coverage:
|
627 |
-
- **Comprehensive Grid Search**: Covers 200+ major cities and metropolitan areas
|
628 |
-
- **Maximum Radius**: 200-mile search radius for complete regional coverage
|
629 |
-
- **Strategic Targeting**: Includes airports, universities, and industrial areas with monitors
|
630 |
-
- **Minimal Deduplication**: Preserves multiple sensors per location for maximum data
|
631 |
-
- **Lightning Fast**: 0.05-second delays for rapid data collection
|
632 |
-
|
633 |
-
**β οΈ Note**: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
|
634 |
-
"""
|
635 |
-
)
|
636 |
-
|
637 |
-
with gr.Row():
|
638 |
-
with gr.Column(scale=3):
|
639 |
-
api_key_input = gr.Textbox(
|
640 |
-
label="AirNow API Key (Optional if environment variable is set)",
|
641 |
-
placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
|
642 |
-
type="password",
|
643 |
-
info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
|
644 |
-
value="" if not env_api_key else "Environment variable configured",
|
645 |
-
interactive=not bool(env_api_key)
|
646 |
-
)
|
647 |
-
with gr.Column(scale=1):
|
648 |
-
load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")
|
649 |
|
650 |
-
|
|
|
651 |
|
652 |
-
|
653 |
-
|
654 |
-
|
|
|
|
|
|
|
|
|
655 |
|
656 |
-
|
657 |
-
|
658 |
-
label="Air Quality Monitoring Stations",
|
659 |
-
interactive=False
|
660 |
-
)
|
661 |
-
|
662 |
-
gr.Markdown(
|
663 |
-
"""
|
664 |
-
## AQI Health Guidelines:
|
665 |
|
666 |
-
|
667 |
-
|
668 |
-
- **Unhealthy for Sensitive Groups (101-150)**: Members of sensitive groups may experience health effects
|
669 |
-
- **Unhealthy (151-200)**: Everyone may begin to experience health effects
|
670 |
-
- **Very Unhealthy (201-300)**: Health warnings of emergency conditions
|
671 |
-
- **Hazardous (301+)**: Health alert - everyone may experience serious health effects
|
672 |
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
- **Update Frequency**: Hourly observations, daily forecasts
|
677 |
|
678 |
-
|
679 |
-
|
680 |
-
- [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
|
681 |
-
- [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
|
682 |
-
"""
|
683 |
-
)
|
684 |
-
|
685 |
-
# Set up event handler
|
686 |
-
load_button.click(
|
687 |
-
fn=update_map,
|
688 |
-
inputs=[api_key_input],
|
689 |
-
outputs=[map_output, data_table, status_text]
|
690 |
-
)
|
691 |
-
|
692 |
-
# Launch the app
|
693 |
-
if __name__ == "__main__":
|
694 |
-
demo.launch()
|
|
|
|
|
1 |
import requests
|
|
|
|
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
+
from datetime import datetime, timedelta
|
4 |
+
import pytz
|
5 |
+
from typing import List, Dict
|
6 |
+
import time
|
7 |
|
8 |
+
class AirNowBulkFetcher:
|
9 |
+
"""Simplified class to fetch bulk AirNow data from the actual working endpoints"""
|
10 |
|
11 |
def __init__(self):
|
12 |
+
self.base_url = "https://files.airnowtech.org"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
def get_current_hourly_data(self) -> List[Dict]:
|
15 |
+
"""Get the most recent hourly data file with ALL monitoring stations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Get current GMT time (AirNow uses GMT)
|
18 |
+
gmt_now = datetime.now(pytz.UTC)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# Try current hour and previous few hours
|
21 |
+
data = []
|
22 |
+
for hour_offset in range(0, 6): # Try current and past 5 hours
|
23 |
try:
|
24 |
+
target_time = gmt_now - timedelta(hours=hour_offset)
|
25 |
+
filename = f"HourlyData_{target_time.strftime('%Y%m%d%H')}.dat"
|
26 |
|
27 |
+
# Try the current/today directory first
|
28 |
+
url = f"{self.base_url}/airnow/today/{filename}"
|
29 |
+
print(f"π Trying: {url}")
|
|
|
30 |
|
31 |
+
response = requests.get(url, timeout=30)
|
32 |
+
|
33 |
+
if response.status_code == 200 and response.text.strip():
|
34 |
+
print(f"β
SUCCESS: Found data file with {len(response.text.splitlines())} lines")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Parse the pipe-delimited data
|
37 |
+
file_data = self.parse_hourly_data_file(response.text)
|
38 |
+
data.extend(file_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
if file_data:
|
41 |
+
print(f"π Parsed {len(file_data)} station records from {filename}")
|
42 |
+
break # Got data, stop trying
|
43 |
|
44 |
+
# Also try the date-specific directory
|
45 |
+
date_dir = target_time.strftime('%Y/%Y%m%d')
|
46 |
+
url_dated = f"{self.base_url}/airnow/{date_dir}/{filename}"
|
47 |
+
print(f"π Trying: {url_dated}")
|
48 |
|
49 |
+
response = requests.get(url_dated, timeout=30)
|
50 |
+
if response.status_code == 200 and response.text.strip():
|
51 |
+
print(f"β
SUCCESS: Found data in dated directory")
|
52 |
+
file_data = self.parse_hourly_data_file(response.text)
|
53 |
+
data.extend(file_data)
|
54 |
+
if file_data:
|
55 |
+
print(f"π Parsed {len(file_data)} station records from dated directory")
|
56 |
+
break
|
57 |
+
|
58 |
except Exception as e:
|
59 |
+
print(f"β Error trying hour {hour_offset}: {str(e)}")
|
60 |
continue
|
61 |
+
|
62 |
+
time.sleep(0.1) # Be nice to the server
|
63 |
|
64 |
return data
|
65 |
|
66 |
+
def get_reporting_areas_data(self) -> List[Dict]:
|
67 |
+
"""Get reporting areas data which also contains station info"""
|
68 |
+
try:
|
69 |
+
url = f"{self.base_url}/airnow/today/reportingarea.dat"
|
70 |
+
print(f"π Trying reporting areas: {url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
+
response = requests.get(url, timeout=30)
|
73 |
+
if response.status_code == 200 and response.text.strip():
|
74 |
+
print(f"β
Found reporting areas file")
|
75 |
+
return self.parse_reporting_areas_file(response.text)
|
76 |
+
except Exception as e:
|
77 |
+
print(f"β Error getting reporting areas: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
return []
|
80 |
|
81 |
+
def parse_hourly_data_file(self, text: str) -> List[Dict]:
|
82 |
+
"""Parse the official AirNow hourly data format"""
|
83 |
+
lines = text.strip().split('\n')
|
84 |
data = []
|
85 |
|
86 |
+
print(f"π§ Parsing {len(lines)} lines...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
for line_num, line in enumerate(lines):
|
89 |
+
if not line.strip():
|
90 |
+
continue
|
91 |
+
|
92 |
try:
|
93 |
+
# Split by pipe delimiter as per official format
|
94 |
+
fields = line.split('|')
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
if len(fields) >= 9: # Minimum required fields
|
97 |
+
record = {
|
98 |
+
'DateObserved': fields[0], # Valid date
|
99 |
+
'HourObserved': fields[1], # Valid time
|
100 |
+
'AQSID': fields[2], # AQS ID
|
101 |
+
'SiteName': fields[3], # Site name
|
102 |
+
'GMTOffset': int(fields[4]) if fields[4].lstrip('-').isdigit() else 0,
|
103 |
+
'ParameterName': fields[5], # Parameter name
|
104 |
+
'ReportingUnits': fields[6], # Units
|
105 |
+
'Value': float(fields[7]) if fields[7].replace('.','').replace('-','').isdigit() else 0,
|
106 |
+
'DataSource': fields[8] if len(fields) > 8 else '',
|
107 |
+
'source': 'hourly_bulk_file'
|
108 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
+
# Calculate approximate lat/lon based on GMT offset (rough estimate)
|
111 |
+
# This is very approximate - you'd need a separate lookup for exact coordinates
|
112 |
+
record['Latitude'] = 39.0 # Default center US
|
113 |
+
record['Longitude'] = -98.0 + (record['GMTOffset'] * 15) # Rough longitude from timezone
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
# Calculate AQI if it's a criteria pollutant
|
116 |
+
record['AQI'] = self.calculate_aqi(record['ParameterName'], record['Value'])
|
117 |
+
|
118 |
+
data.append(record)
|
119 |
+
|
120 |
+
except Exception as e:
|
121 |
+
print(f"β Error parsing line {line_num}: {str(e)}")
|
122 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
print(f"β
Successfully parsed {len(data)} records")
|
125 |
return data
|
126 |
|
127 |
+
def parse_reporting_areas_file(self, text: str) -> List[Dict]:
|
128 |
+
"""Parse the reporting areas file format"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
lines = text.strip().split('\n')
|
130 |
data = []
|
131 |
|
132 |
for line in lines:
|
133 |
+
if not line.strip():
|
134 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
+
try:
|
137 |
+
fields = line.split('|')
|
138 |
+
if len(fields) >= 8:
|
139 |
record = {
|
140 |
+
'DateObserved': fields[0],
|
141 |
+
'HourObserved': fields[1],
|
142 |
+
'ReportingArea': fields[3],
|
143 |
+
'StateCode': fields[4],
|
144 |
+
'Latitude': float(fields[5]) if fields[5] else 0,
|
145 |
+
'Longitude': float(fields[6]) if fields[6] else 0,
|
146 |
+
'ParameterName': fields[7],
|
147 |
+
'AQI': int(fields[8]) if fields[8].isdigit() else 0,
|
148 |
+
'CategoryName': fields[9] if len(fields) > 9 else '',
|
149 |
+
'source': 'reporting_areas_file'
|
|
|
150 |
}
|
151 |
data.append(record)
|
152 |
except:
|
153 |
continue
|
154 |
+
|
155 |
return data
|
156 |
|
157 |
+
def calculate_aqi(self, parameter: str, value: float) -> int:
|
158 |
+
"""Calculate AQI for common parameters"""
|
159 |
+
if parameter == 'OZONE' and value > 0:
|
160 |
+
# Simplified ozone AQI calculation (8-hour average basis)
|
161 |
+
if value <= 54: return int((50/54) * value)
|
162 |
+
elif value <= 70: return int(51 + (49/16) * (value - 54))
|
163 |
+
elif value <= 85: return int(101 + (49/15) * (value - 70))
|
164 |
+
elif value <= 105: return int(151 + (49/20) * (value - 85))
|
165 |
+
else: return int(201 + (199/95) * min(value - 105, 95))
|
166 |
+
|
167 |
+
elif parameter == 'PM2.5' and value >= 0:
|
168 |
+
# PM2.5 AQI calculation
|
169 |
+
if value <= 12.0: return int((50/12) * value)
|
170 |
+
elif value <= 35.4: return int(51 + (49/23.4) * (value - 12))
|
171 |
+
elif value <= 55.4: return int(101 + (49/20) * (value - 35.4))
|
172 |
+
elif value <= 150.4: return int(151 + (49/95) * (value - 55.4))
|
173 |
+
else: return int(201 + (199/149.6) * min(value - 150.4, 149.6))
|
174 |
+
|
175 |
+
elif parameter == 'PM10' and value >= 0:
|
176 |
+
# PM10 AQI calculation
|
177 |
+
if value <= 54: return int((50/54) * value)
|
178 |
+
elif value <= 154: return int(51 + (49/100) * (value - 54))
|
179 |
+
elif value <= 254: return int(101 + (49/100) * (value - 154))
|
180 |
+
elif value <= 354: return int(151 + (49/100) * (value - 254))
|
181 |
+
else: return int(201 + (199/146) * min(value - 354, 146))
|
182 |
+
|
183 |
+
return 0 # Default for non-AQI parameters
|
184 |
+
|
185 |
+
def get_all_stations(self) -> List[Dict]:
|
186 |
+
"""Get data from all available sources"""
|
187 |
+
print("π Fetching AirNow bulk station data...")
|
188 |
+
|
189 |
+
all_data = []
|
190 |
+
|
191 |
+
# Get hourly data (contains ALL active monitoring stations)
|
192 |
+
print("\nπ Getting hourly monitoring data...")
|
193 |
+
hourly_data = self.get_current_hourly_data()
|
194 |
+
all_data.extend(hourly_data)
|
195 |
+
|
196 |
+
# Get reporting areas data
|
197 |
+
print("\nπ Getting reporting areas data...")
|
198 |
+
areas_data = self.get_reporting_areas_data()
|
199 |
+
all_data.extend(areas_data)
|
200 |
+
|
201 |
+
# Deduplicate based on site ID and parameter
|
202 |
+
print(f"\nπ§ Deduplicating {len(all_data)} records...")
|
203 |
unique_data = []
|
204 |
+
seen = set()
|
205 |
|
206 |
+
for record in all_data:
|
207 |
+
key = (record.get('AQSID', ''), record.get('ParameterName', ''), record.get('SiteName', ''))
|
208 |
+
if key not in seen:
|
209 |
+
seen.add(key)
|
210 |
+
unique_data.append(record)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
+
print(f"β
Final result: {len(unique_data)} unique monitoring stations")
|
213 |
return unique_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
+
# Usage example
|
216 |
+
if __name__ == "__main__":
|
217 |
+
fetcher = AirNowBulkFetcher()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
+
# Get all stations data
|
220 |
+
stations_data = fetcher.get_all_stations()
|
221 |
|
222 |
+
if stations_data:
|
223 |
+
# Convert to DataFrame for easy viewing
|
224 |
+
df = pd.DataFrame(stations_data)
|
225 |
+
print(f"\nπ Data Summary:")
|
226 |
+
print(f"Total stations: {len(df)}")
|
227 |
+
print(f"Parameters monitored: {df['ParameterName'].nunique()}")
|
228 |
+
print(f"Unique sites: {df['SiteName'].nunique()}")
|
229 |
|
230 |
+
print(f"\nParameter breakdown:")
|
231 |
+
print(df['ParameterName'].value_counts().head(10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
+
print(f"\nSample records:")
|
234 |
+
print(df[['SiteName', 'ParameterName', 'Value', 'AQI', 'DataSource']].head(10))
|
|
|
|
|
|
|
|
|
235 |
|
236 |
+
# Save to CSV
|
237 |
+
df.to_csv('airnow_stations_data.csv', index=False)
|
238 |
+
print(f"\nπΎ Data saved to 'airnow_stations_data.csv'")
|
|
|
239 |
|
240 |
+
else:
|
241 |
+
print("β No data retrieved. Check the AirNow file servers.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|