nakas commited on
Commit
b0c2c45
Β·
verified Β·
1 Parent(s): b21ff10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -637
app.py CHANGED
@@ -1,694 +1,241 @@
1
- import gradio as gr
2
  import requests
3
- import folium
4
- import json
5
- import time
6
- import os
7
- from typing import Dict, List, Optional, Tuple
8
  import pandas as pd
 
 
 
 
9
 
10
- class AirQualityMapper:
11
- """Class to handle AirNow API interactions and map generation"""
12
 
13
  def __init__(self):
14
- self.base_url = "https://www.airnowapi.org"
15
- self.aqi_colors = {
16
- "Good": "#00E400",
17
- "Moderate": "#FFFF00",
18
- "Unhealthy for Sensitive Groups": "#FF7E00",
19
- "Unhealthy": "#FF0000",
20
- "Very Unhealthy": "#8F3F97",
21
- "Hazardous": "#7E0023"
22
- }
23
- self.aqi_ranges = {
24
- (0, 50): "Good",
25
- (51, 100): "Moderate",
26
- (101, 150): "Unhealthy for Sensitive Groups",
27
- (151, 200): "Unhealthy",
28
- (201, 300): "Very Unhealthy",
29
- (301, 500): "Hazardous"
30
- }
31
-
32
- def get_aqi_category(self, aqi_value: int) -> str:
33
- """Get AQI category based on value"""
34
- for (min_val, max_val), category in self.aqi_ranges.items():
35
- if min_val <= aqi_value <= max_val:
36
- return category
37
- return "Unknown"
38
-
39
- def get_aqi_color(self, category: str) -> str:
40
- """Get color for AQI category"""
41
- return self.aqi_colors.get(category, "#808080")
42
-
43
- def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
44
- """
45
- DIRECT ACCESS: Get ALL monitoring stations from hourly data files
46
- Returns: (data_list, status_message)
47
- """
48
- if not api_key or api_key.strip() == "":
49
- return [], "❌ Please enter a valid AirNow API key"
50
-
51
- print(f"🎯 DIRECT FILE ACCESS: Grabbing hourly data files with ALL monitoring sites...")
52
 
53
- try:
54
- all_data = []
55
-
56
- # STRATEGY 1: Access hourly data files directly
57
- print("πŸ“ STRATEGY 1: Accessing bulk hourly data files...")
58
- hourly_data = self.get_hourly_data_files(api_key)
59
- all_data.extend(hourly_data)
60
- print(f"Hourly files found: {len(hourly_data)} station records")
61
-
62
- # STRATEGY 2: Try file products mentioned in research
63
- print("πŸ“ STRATEGY 2: Accessing file products and bulk endpoints...")
64
- file_data = self.get_file_products(api_key)
65
- all_data.extend(file_data)
66
- print(f"File products found: {len(file_data)} additional records")
67
-
68
- # STRATEGY 3: Try data dumps and bulk exports
69
- print("πŸ“ STRATEGY 3: Accessing data dumps and exports...")
70
- export_data = self.get_bulk_exports(api_key)
71
- all_data.extend(export_data)
72
- print(f"Bulk exports found: {len(export_data)} additional records")
73
-
74
- # STRATEGY 4: Parse any CSV/XML/JSON data files
75
- print("πŸ“ STRATEGY 4: Parsing structured data files...")
76
- parsed_data = self.parse_structured_files(api_key)
77
- all_data.extend(parsed_data)
78
- print(f"Parsed files found: {len(parsed_data)} additional records")
79
-
80
- print(f"🎯 Total raw data from files: {len(all_data)} records")
81
-
82
- if not all_data:
83
- return [], f"⚠️ No bulk data files accessible with this API key."
84
-
85
- # Comprehensive deduplication
86
- unique_data = self.comprehensive_deduplication(all_data)
87
-
88
- print(f"πŸ† FINAL RESULT: {len(unique_data)} unique monitoring stations from bulk files")
89
-
90
- return unique_data, f"🎯 BULK FILE ACCESS: Found {len(unique_data)} monitoring stations from hourly data files"
91
-
92
- except Exception as e:
93
- print(f"File access error: {str(e)}")
94
- return [], f"❌ Error accessing bulk files: {str(e)}"
95
-
96
- def get_hourly_data_files(self, api_key: str) -> List[Dict]:
97
- """Access hourly data files containing all monitoring sites"""
98
- data = []
99
 
100
- # Try various hourly data file endpoints
101
- hourly_endpoints = [
102
- f"{self.base_url}/files/data/",
103
- f"{self.base_url}/files/hourly/",
104
- f"{self.base_url}/files/",
105
- f"{self.base_url}/aq/data/hourly/",
106
- f"{self.base_url}/aq/files/",
107
- f"{self.base_url}/data/",
108
- f"{self.base_url}/hourly/",
109
- # Try specific file formats
110
- f"{self.base_url}/files/HourlyData.dat",
111
- f"{self.base_url}/files/MonitoringSites.dat",
112
- f"{self.base_url}/files/reportingarea.dat",
113
- f"{self.base_url}/files/HourlyData.csv",
114
- f"{self.base_url}/files/stations.csv",
115
- f"{self.base_url}/files/current.csv",
116
- f"{self.base_url}/files/sites.xml",
117
- f"{self.base_url}/files/data.json",
118
- ]
119
 
120
- for endpoint in hourly_endpoints:
 
 
121
  try:
122
- print(f"Trying: {endpoint}")
 
123
 
124
- # Try with API key parameter
125
- response = requests.get(endpoint,
126
- params={"API_KEY": api_key, "format": "json"},
127
- timeout=30)
128
 
129
- if response.status_code == 200:
130
- print(f"βœ… SUCCESS: {endpoint} returned data")
131
-
132
- # Try to parse as JSON first
133
- try:
134
- json_data = response.json()
135
- if isinstance(json_data, list):
136
- for record in json_data:
137
- record['source'] = 'hourly_file'
138
- data.extend(json_data)
139
- print(f"Parsed JSON: {len(json_data)} records")
140
- continue
141
- except:
142
- pass
143
-
144
- # Try to parse as CSV
145
- try:
146
- csv_data = self.parse_csv_response(response.text)
147
- if csv_data:
148
- data.extend(csv_data)
149
- print(f"Parsed CSV: {len(csv_data)} records")
150
- continue
151
- except:
152
- pass
153
 
154
- # Try to parse as pipe-delimited (common AirNow format)
155
- try:
156
- pipe_data = self.parse_pipe_delimited(response.text)
157
- if pipe_data:
158
- data.extend(pipe_data)
159
- print(f"Parsed pipe-delimited: {len(pipe_data)} records")
160
- continue
161
- except:
162
- pass
163
 
164
- print(f"Could not parse response format from {endpoint}")
165
- else:
166
- print(f"❌ {endpoint}: HTTP {response.status_code}")
167
 
168
- time.sleep(0.1)
 
 
 
169
 
 
 
 
 
 
 
 
 
 
170
  except Exception as e:
171
- print(f"Error accessing {endpoint}: {str(e)}")
172
  continue
 
 
173
 
174
  return data
175
 
176
- def get_file_products(self, api_key: str) -> List[Dict]:
177
- """Access file products mentioned in AirNow documentation"""
178
- data = []
179
-
180
- # File products mentioned in research and documentation
181
- file_products = [
182
- # From airnowtech.org - these often have bulk data
183
- "https://files.airnowtech.org/airnow/today/HourlyData.dat",
184
- "https://files.airnowtech.org/airnow/today/daily_summary.dat",
185
- "https://files.airnowtech.org/airnow/today/reportingarea.dat",
186
- "https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat",
187
- "https://files.airnowtech.org/airnow/HourlyData.dat",
188
- "https://files.airnowtech.org/HourlyData.dat",
189
- "https://files.airnowtech.org/reportingarea.dat",
190
- "https://files.airnowtech.org/airnow/today/HourlyAQObs.dat",
191
- "https://files.airnowtech.org/airnow/today/HourlyAQForecast.dat",
192
 
193
- # Try direct API file endpoints
194
- f"{self.base_url}/files/data/HourlyData.dat",
195
- f"{self.base_url}/files/HourlyData.dat",
196
- f"{self.base_url}/files/today/HourlyData.dat",
197
- f"{self.base_url}/files/reportingarea.dat",
198
- f"{self.base_url}/files/sites.dat",
199
- f"{self.base_url}/files/monitors.dat",
200
- ]
201
-
202
- for file_url in file_products:
203
- try:
204
- print(f"Trying file: {file_url}")
205
-
206
- # Try with and without API key
207
- for use_api_key in [True, False]:
208
- try:
209
- if use_api_key and "airnowapi.org" in file_url:
210
- params = {"API_KEY": api_key}
211
- else:
212
- params = {}
213
-
214
- response = requests.get(file_url, params=params, timeout=30)
215
-
216
- if response.status_code == 200 and response.text.strip():
217
- print(f"βœ… File found: {file_url}")
218
-
219
- # Parse the file content
220
- file_data = self.parse_airnow_file_format(response.text)
221
- if file_data:
222
- for record in file_data:
223
- record['source'] = 'file_product'
224
- data.extend(file_data)
225
- print(f"Parsed file: {len(file_data)} records")
226
- break # Success, move to next file
227
-
228
- except Exception as e:
229
- continue
230
-
231
- time.sleep(0.1)
232
-
233
- except Exception as e:
234
- continue
235
 
236
- return data
237
 
238
- def get_bulk_exports(self, api_key: str) -> List[Dict]:
239
- """Try bulk export endpoints"""
 
240
  data = []
241
 
242
- export_endpoints = [
243
- f"{self.base_url}/aq/data/",
244
- f"{self.base_url}/aq/observation/",
245
- f"{self.base_url}/aq/monitoring/",
246
- f"{self.base_url}/export/",
247
- f"{self.base_url}/bulk/",
248
- f"{self.base_url}/download/",
249
- f"{self.base_url}/api/data/",
250
- ]
251
 
252
- for endpoint in export_endpoints:
 
 
 
253
  try:
254
- params = {
255
- "format": "json",
256
- "API_KEY": api_key,
257
- "datatype": "monitoring",
258
- "export": "all"
259
- }
260
 
261
- response = requests.get(endpoint, params=params, timeout=20)
262
- if response.status_code == 200:
263
- try:
264
- export_data = response.json()
265
- if export_data and isinstance(export_data, list):
266
- for record in export_data:
267
- record['source'] = 'bulk_export'
268
- data.extend(export_data)
269
- print(f"Bulk export: {len(export_data)} records")
270
- except:
271
- pass
272
- except:
273
- continue
274
-
275
- return data
276
-
277
- def parse_structured_files(self, api_key: str) -> List[Dict]:
278
- """Try to get structured data files in various formats"""
279
- data = []
280
-
281
- # Try current hour data endpoint (should have all active stations)
282
- try:
283
- from datetime import datetime
284
- import pytz
285
-
286
- # Get current hour in Eastern Time (AirNow's timezone)
287
- eastern = pytz.timezone('US/Eastern')
288
- now = datetime.now(eastern)
289
-
290
- # Try current hour endpoint
291
- hour_endpoints = [
292
- f"{self.base_url}/aq/observation/zipCode/current/",
293
- f"{self.base_url}/aq/data/monitoring/current/",
294
- f"{self.base_url}/aq/observation/latLong/current/",
295
- ]
296
-
297
- # Try to get data for entire country using bounding box
298
- usa_bbox = {
299
- "minLat": 18.0, # Southern tip of Hawaii
300
- "maxLat": 72.0, # Northern Alaska
301
- "minLon": -180.0, # Western Alaska
302
- "maxLon": -65.0 # Eastern Maine
303
- }
304
-
305
- for endpoint in hour_endpoints:
306
- try:
307
- if "zipCode" in endpoint:
308
- # Use major ZIP codes to get broad coverage
309
- major_zips = ["10001", "90210", "60601", "77001", "33101", "85001", "98101"]
310
- for zipcode in major_zips:
311
- params = {
312
- "format": "application/json",
313
- "zipCode": zipcode,
314
- "distance": 200, # Max distance
315
- "API_KEY": api_key
316
- }
317
- response = requests.get(endpoint, params=params, timeout=15)
318
- if response.status_code == 200:
319
- zip_data = response.json()
320
- if zip_data:
321
- data.extend(zip_data)
322
 
323
- elif "latLong" in endpoint:
324
- # Use center of US
325
- params = {
326
- "format": "application/json",
327
- "latitude": 39.8283,
328
- "longitude": -98.5795,
329
- "distance": 2000, # Very large radius
330
- "API_KEY": api_key
331
- }
332
- response = requests.get(endpoint, params=params, timeout=15)
333
- if response.status_code == 200:
334
- center_data = response.json()
335
- if center_data:
336
- data.extend(center_data)
337
 
338
- else:
339
- # Try with bounding box if supported
340
- params = {
341
- "format": "application/json",
342
- "API_KEY": api_key,
343
- **usa_bbox
344
- }
345
- response = requests.get(endpoint, params=params, timeout=15)
346
- if response.status_code == 200:
347
- bbox_data = response.json()
348
- if bbox_data:
349
- data.extend(bbox_data)
350
-
351
- except Exception as e:
352
- continue
353
-
354
- except Exception as e:
355
- print(f"Error in structured files: {str(e)}")
356
 
 
357
  return data
358
 
359
- def parse_csv_response(self, text: str) -> List[Dict]:
360
- """Parse CSV format response"""
361
- import csv
362
- from io import StringIO
363
-
364
- try:
365
- reader = csv.DictReader(StringIO(text))
366
- return [row for row in reader]
367
- except:
368
- return []
369
-
370
- def parse_pipe_delimited(self, text: str) -> List[Dict]:
371
- """Parse pipe-delimited format (common in AirNow files)"""
372
- lines = text.strip().split('\n')
373
- if len(lines) < 2:
374
- return []
375
-
376
- try:
377
- # First line might be headers
378
- headers = lines[0].split('|')
379
- data = []
380
-
381
- for line in lines[1:]:
382
- values = line.split('|')
383
- if len(values) == len(headers):
384
- record = dict(zip(headers, values))
385
- data.append(record)
386
-
387
- return data
388
- except:
389
- return []
390
-
391
- def parse_airnow_file_format(self, text: str) -> List[Dict]:
392
- """Parse standard AirNow file format"""
393
  lines = text.strip().split('\n')
394
  data = []
395
 
396
  for line in lines:
397
- if not line.strip() or line.startswith('#'):
398
  continue
399
-
400
- try:
401
- # Try pipe-delimited first
402
- if '|' in line:
403
- parts = line.split('|')
404
- elif ',' in line:
405
- parts = line.split(',')
406
- elif '\t' in line:
407
- parts = line.split('\t')
408
- else:
409
- continue
410
 
411
- # Create record based on typical AirNow format
412
- if len(parts) >= 8:
 
413
  record = {
414
- 'DateObserved': parts[0] if len(parts) > 0 else '',
415
- 'HourObserved': parts[1] if len(parts) > 1 else '',
416
- 'LocalTimeZone': parts[2] if len(parts) > 2 else '',
417
- 'ReportingArea': parts[3] if len(parts) > 3 else '',
418
- 'StateCode': parts[4] if len(parts) > 4 else '',
419
- 'Latitude': float(parts[5]) if len(parts) > 5 and parts[5] else 0,
420
- 'Longitude': float(parts[6]) if len(parts) > 6 and parts[6] else 0,
421
- 'ParameterName': parts[7] if len(parts) > 7 else '',
422
- 'AQI': int(parts[8]) if len(parts) > 8 and parts[8].isdigit() else 0,
423
- 'CategoryName': parts[9] if len(parts) > 9 else '',
424
- 'source': 'parsed_file'
425
  }
426
  data.append(record)
427
  except:
428
  continue
429
-
430
  return data
431
 
432
- def comprehensive_deduplication(self, data: List[Dict]) -> List[Dict]:
433
- """Comprehensive deduplication preserving maximum unique stations"""
434
- seen_stations = set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  unique_data = []
 
436
 
437
- for item in data:
438
- # Create ultra-specific key to preserve different monitor types
439
- station_key = (
440
- round(item.get('Latitude', 0), 8), # Very high precision
441
- round(item.get('Longitude', 0), 8),
442
- item.get('ParameterName', ''),
443
- item.get('SiteName', ''),
444
- item.get('AgencyName', ''),
445
- item.get('MonitorType', ''),
446
- item.get('ReportingArea', ''),
447
- item.get('StateCode', ''),
448
- item.get('CountyCode', '')
449
- )
450
-
451
- if station_key not in seen_stations:
452
- seen_stations.add(station_key)
453
- unique_data.append(item)
454
 
 
455
  return unique_data
456
-
457
- def create_map(self, data: List[Dict]) -> str:
458
- """Create an interactive map with air quality data"""
459
- if not data:
460
- # Create a basic US map if no data
461
- m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
462
- folium.Marker(
463
- [39.8283, -98.5795],
464
- popup="No data available. Please check your API key.",
465
- icon=folium.Icon(color='red', icon='info-sign')
466
- ).add_to(m)
467
- return m._repr_html_()
468
-
469
- # Calculate center point of all data
470
- lats = [item['Latitude'] for item in data if 'Latitude' in item]
471
- lons = [item['Longitude'] for item in data if 'Longitude' in item]
472
-
473
- if lats and lons:
474
- center_lat = sum(lats) / len(lats)
475
- center_lon = sum(lons) / len(lons)
476
- else:
477
- center_lat, center_lon = 39.8283, -98.5795 # Center of US
478
-
479
- # Create map
480
- m = folium.Map(location=[center_lat, center_lon], zoom_start=4)
481
-
482
- # Add markers for each monitoring location
483
- for item in data:
484
- try:
485
- lat = item.get('Latitude')
486
- lon = item.get('Longitude')
487
- aqi = item.get('AQI', 0)
488
- parameter = item.get('ParameterName', 'Unknown')
489
- area = item.get('ReportingArea', 'Unknown Area')
490
- state = item.get('StateCode', 'Unknown')
491
- category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))
492
-
493
- if lat is None or lon is None:
494
- continue
495
-
496
- # Get color based on AQI category
497
- color = self.get_aqi_color(category)
498
-
499
- # Create popup content
500
- popup_content = f"""
501
- <div style="width: 200px;">
502
- <h4>{area}, {state}</h4>
503
- <p><b>AQI:</b> {aqi} ({category})</p>
504
- <p><b>Parameter:</b> {parameter}</p>
505
- <p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
506
- <p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
507
- </div>
508
- """
509
-
510
- # Determine marker color based on AQI
511
- if aqi <= 50:
512
- marker_color = 'green'
513
- elif aqi <= 100:
514
- marker_color = 'orange' # Changed from 'yellow' to 'orange'
515
- elif aqi <= 150:
516
- marker_color = 'orange'
517
- elif aqi <= 200:
518
- marker_color = 'red'
519
- elif aqi <= 300:
520
- marker_color = 'purple'
521
- else:
522
- marker_color = 'darkred'
523
-
524
- # Add marker
525
- folium.Marker(
526
- [lat, lon],
527
- popup=folium.Popup(popup_content, max_width=250),
528
- tooltip=f"{area}: AQI {aqi}",
529
- icon=folium.Icon(color=marker_color, icon='cloud')
530
- ).add_to(m)
531
-
532
- except Exception as e:
533
- continue # Skip problematic markers
534
-
535
- # Add legend
536
- legend_html = """
537
- <div style="position: fixed;
538
- bottom: 50px; left: 50px; width: 150px; height: 180px;
539
- background-color: white; border:2px solid grey; z-index:9999;
540
- font-size:14px; padding: 10px">
541
- <h4>AQI Legend</h4>
542
- <p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
543
- <p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
544
- <p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
545
- <p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
546
- <p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
547
- <p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
548
- </div>
549
- """
550
- m.get_root().html.add_child(folium.Element(legend_html))
551
-
552
- return m._repr_html_()
553
-
554
- def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
555
- """Create a data table from the air quality data"""
556
- if not data:
557
- return pd.DataFrame()
558
-
559
- # Extract relevant columns
560
- table_data = []
561
- for item in data:
562
- table_data.append({
563
- 'Reporting Area': item.get('ReportingArea', 'Unknown'),
564
- 'State': item.get('StateCode', 'Unknown'),
565
- 'AQI': item.get('AQI', 0),
566
- 'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
567
- 'Parameter': item.get('ParameterName', 'Unknown'),
568
- 'Date': item.get('DateObserved', 'Unknown'),
569
- 'Hour': item.get('HourObserved', 'Unknown'),
570
- 'Latitude': item.get('Latitude', 'Unknown'),
571
- 'Longitude': item.get('Longitude', 'Unknown')
572
- })
573
-
574
- df = pd.DataFrame(table_data)
575
- return df.sort_values('AQI', ascending=False)
576
-
577
- # Initialize the mapper
578
- mapper = AirQualityMapper()
579
 
580
- # Check environment variable on startup
581
- env_api_key = os.getenv('AIRNOW_API_KEY')
582
- print(f"Environment variable AIRNOW_API_KEY: {'SET' if env_api_key else 'NOT SET'}")
583
- if env_api_key:
584
- print(f"API key starts with: {env_api_key[:8]}...")
585
-
586
- def update_map(api_key: str):
587
- """Update the map with fresh air quality data"""
588
- # Check for environment variable first, then use provided key
589
- env_api_key = os.getenv('AIRNOW_API_KEY')
590
- if env_api_key:
591
- api_key = env_api_key
592
-
593
- if not api_key.strip():
594
- return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "❌ No API key provided"
595
-
596
- # Fetch data
597
- data, status = mapper.fetch_airnow_data(api_key)
598
-
599
- # Create map
600
- map_html = mapper.create_map(data)
601
-
602
- # Create data table
603
- df = mapper.create_data_table(data)
604
-
605
- return map_html, df, status
606
-
607
- # Create Gradio interface
608
- with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
609
- # Check if API key is set as environment variable
610
- env_api_key = os.getenv('AIRNOW_API_KEY')
611
- api_key_status = "βœ… API key loaded from environment variable" if env_api_key else "⚠️ No environment variable set"
612
-
613
- gr.Markdown(
614
- f"""
615
- # 🌬️ AirNow Air Quality Sensor Map
616
-
617
- **API Key Status**: {api_key_status}
618
-
619
- This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.
620
-
621
- ## How to use:
622
- 1. **API Key**: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
623
- 2. **Click "Load Air Quality Data"** to fetch current readings from 500+ monitoring stations nationwide
624
- 3. **Explore the map**: Click on markers to see detailed information about each monitoring station
625
-
626
- ## Enhanced Coverage:
627
- - **Comprehensive Grid Search**: Covers 200+ major cities and metropolitan areas
628
- - **Maximum Radius**: 200-mile search radius for complete regional coverage
629
- - **Strategic Targeting**: Includes airports, universities, and industrial areas with monitors
630
- - **Minimal Deduplication**: Preserves multiple sensors per location for maximum data
631
- - **Lightning Fast**: 0.05-second delays for rapid data collection
632
-
633
- **⚠️ Note**: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
634
- """
635
- )
636
-
637
- with gr.Row():
638
- with gr.Column(scale=3):
639
- api_key_input = gr.Textbox(
640
- label="AirNow API Key (Optional if environment variable is set)",
641
- placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
642
- type="password",
643
- info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
644
- value="" if not env_api_key else "Environment variable configured",
645
- interactive=not bool(env_api_key)
646
- )
647
- with gr.Column(scale=1):
648
- load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")
649
 
650
- status_text = gr.Markdown("Click 'Load Air Quality Data' to begin." if env_api_key else "Enter your API key and click 'Load Air Quality Data' to begin.")
 
651
 
652
- with gr.Tabs():
653
- with gr.TabItem("Interactive Map"):
654
- map_output = gr.HTML(label="Air Quality Map")
 
 
 
 
655
 
656
- with gr.TabItem("Data Table"):
657
- data_table = gr.Dataframe(
658
- label="Air Quality Monitoring Stations",
659
- interactive=False
660
- )
661
-
662
- gr.Markdown(
663
- """
664
- ## AQI Health Guidelines:
665
 
666
- - **Good (0-50)**: Air quality is satisfactory for everyone
667
- - **Moderate (51-100)**: Air quality is acceptable for most people
668
- - **Unhealthy for Sensitive Groups (101-150)**: Members of sensitive groups may experience health effects
669
- - **Unhealthy (151-200)**: Everyone may begin to experience health effects
670
- - **Very Unhealthy (201-300)**: Health warnings of emergency conditions
671
- - **Hazardous (301+)**: Health alert - everyone may experience serious health effects
672
 
673
- ## Data Sources:
674
- - **AirNow API**: Real-time air quality data from EPA's monitoring network
675
- - **Monitoring Agencies**: 120+ local, state, tribal, and federal government agencies
676
- - **Update Frequency**: Hourly observations, daily forecasts
677
 
678
- ## Links:
679
- - [AirNow.gov](https://www.airnow.gov) - Official air quality information
680
- - [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
681
- - [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
682
- """
683
- )
684
-
685
- # Set up event handler
686
- load_button.click(
687
- fn=update_map,
688
- inputs=[api_key_input],
689
- outputs=[map_output, data_table, status_text]
690
- )
691
-
692
- # Launch the app
693
- if __name__ == "__main__":
694
- demo.launch()
 
 
1
  import requests
 
 
 
 
 
2
  import pandas as pd
3
+ from datetime import datetime, timedelta
4
+ import pytz
5
+ from typing import List, Dict
6
+ import time
7
 
8
+ class AirNowBulkFetcher:
9
+ """Simplified class to fetch bulk AirNow data from the actual working endpoints"""
10
 
11
  def __init__(self):
12
+ self.base_url = "https://files.airnowtech.org"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def get_current_hourly_data(self) -> List[Dict]:
15
+ """Get the most recent hourly data file with ALL monitoring stations"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Get current GMT time (AirNow uses GMT)
18
+ gmt_now = datetime.now(pytz.UTC)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Try current hour and previous few hours
21
+ data = []
22
+ for hour_offset in range(0, 6): # Try current and past 5 hours
23
  try:
24
+ target_time = gmt_now - timedelta(hours=hour_offset)
25
+ filename = f"HourlyData_{target_time.strftime('%Y%m%d%H')}.dat"
26
 
27
+ # Try the current/today directory first
28
+ url = f"{self.base_url}/airnow/today/{filename}"
29
+ print(f"πŸ” Trying: {url}")
 
30
 
31
+ response = requests.get(url, timeout=30)
32
+
33
+ if response.status_code == 200 and response.text.strip():
34
+ print(f"βœ… SUCCESS: Found data file with {len(response.text.splitlines())} lines")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Parse the pipe-delimited data
37
+ file_data = self.parse_hourly_data_file(response.text)
38
+ data.extend(file_data)
 
 
 
 
 
 
39
 
40
+ if file_data:
41
+ print(f"πŸ“Š Parsed {len(file_data)} station records from {filename}")
42
+ break # Got data, stop trying
43
 
44
+ # Also try the date-specific directory
45
+ date_dir = target_time.strftime('%Y/%Y%m%d')
46
+ url_dated = f"{self.base_url}/airnow/{date_dir}/{filename}"
47
+ print(f"πŸ” Trying: {url_dated}")
48
 
49
+ response = requests.get(url_dated, timeout=30)
50
+ if response.status_code == 200 and response.text.strip():
51
+ print(f"βœ… SUCCESS: Found data in dated directory")
52
+ file_data = self.parse_hourly_data_file(response.text)
53
+ data.extend(file_data)
54
+ if file_data:
55
+ print(f"πŸ“Š Parsed {len(file_data)} station records from dated directory")
56
+ break
57
+
58
  except Exception as e:
59
+ print(f"❌ Error trying hour {hour_offset}: {str(e)}")
60
  continue
61
+
62
+ time.sleep(0.1) # Be nice to the server
63
 
64
  return data
65
 
66
+ def get_reporting_areas_data(self) -> List[Dict]:
67
+ """Get reporting areas data which also contains station info"""
68
+ try:
69
+ url = f"{self.base_url}/airnow/today/reportingarea.dat"
70
+ print(f"πŸ” Trying reporting areas: {url}")
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ response = requests.get(url, timeout=30)
73
+ if response.status_code == 200 and response.text.strip():
74
+ print(f"βœ… Found reporting areas file")
75
+ return self.parse_reporting_areas_file(response.text)
76
+ except Exception as e:
77
+ print(f"❌ Error getting reporting areas: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ return []
80
 
81
+ def parse_hourly_data_file(self, text: str) -> List[Dict]:
82
+ """Parse the official AirNow hourly data format"""
83
+ lines = text.strip().split('\n')
84
  data = []
85
 
86
+ print(f"πŸ”§ Parsing {len(lines)} lines...")
 
 
 
 
 
 
 
 
87
 
88
+ for line_num, line in enumerate(lines):
89
+ if not line.strip():
90
+ continue
91
+
92
  try:
93
+ # Split by pipe delimiter as per official format
94
+ fields = line.split('|')
 
 
 
 
95
 
96
+ if len(fields) >= 9: # Minimum required fields
97
+ record = {
98
+ 'DateObserved': fields[0], # Valid date
99
+ 'HourObserved': fields[1], # Valid time
100
+ 'AQSID': fields[2], # AQS ID
101
+ 'SiteName': fields[3], # Site name
102
+ 'GMTOffset': int(fields[4]) if fields[4].lstrip('-').isdigit() else 0,
103
+ 'ParameterName': fields[5], # Parameter name
104
+ 'ReportingUnits': fields[6], # Units
105
+ 'Value': float(fields[7]) if fields[7].replace('.','').replace('-','').isdigit() else 0,
106
+ 'DataSource': fields[8] if len(fields) > 8 else '',
107
+ 'source': 'hourly_bulk_file'
108
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # Calculate approximate lat/lon based on GMT offset (rough estimate)
111
+ # This is very approximate - you'd need a separate lookup for exact coordinates
112
+ record['Latitude'] = 39.0 # Default center US
113
+ record['Longitude'] = -98.0 + (record['GMTOffset'] * 15) # Rough longitude from timezone
 
 
 
 
 
 
 
 
 
 
114
 
115
+ # Calculate AQI if it's a criteria pollutant
116
+ record['AQI'] = self.calculate_aqi(record['ParameterName'], record['Value'])
117
+
118
+ data.append(record)
119
+
120
+ except Exception as e:
121
+ print(f"❌ Error parsing line {line_num}: {str(e)}")
122
+ continue
 
 
 
 
 
 
 
 
 
 
123
 
124
+ print(f"βœ… Successfully parsed {len(data)} records")
125
  return data
126
 
127
+ def parse_reporting_areas_file(self, text: str) -> List[Dict]:
128
+ """Parse the reporting areas file format"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  lines = text.strip().split('\n')
130
  data = []
131
 
132
  for line in lines:
133
+ if not line.strip():
134
  continue
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ try:
137
+ fields = line.split('|')
138
+ if len(fields) >= 8:
139
  record = {
140
+ 'DateObserved': fields[0],
141
+ 'HourObserved': fields[1],
142
+ 'ReportingArea': fields[3],
143
+ 'StateCode': fields[4],
144
+ 'Latitude': float(fields[5]) if fields[5] else 0,
145
+ 'Longitude': float(fields[6]) if fields[6] else 0,
146
+ 'ParameterName': fields[7],
147
+ 'AQI': int(fields[8]) if fields[8].isdigit() else 0,
148
+ 'CategoryName': fields[9] if len(fields) > 9 else '',
149
+ 'source': 'reporting_areas_file'
 
150
  }
151
  data.append(record)
152
  except:
153
  continue
154
+
155
  return data
156
 
157
+ def calculate_aqi(self, parameter: str, value: float) -> int:
158
+ """Calculate AQI for common parameters"""
159
+ if parameter == 'OZONE' and value > 0:
160
+ # Simplified ozone AQI calculation (8-hour average basis)
161
+ if value <= 54: return int((50/54) * value)
162
+ elif value <= 70: return int(51 + (49/16) * (value - 54))
163
+ elif value <= 85: return int(101 + (49/15) * (value - 70))
164
+ elif value <= 105: return int(151 + (49/20) * (value - 85))
165
+ else: return int(201 + (199/95) * min(value - 105, 95))
166
+
167
+ elif parameter == 'PM2.5' and value >= 0:
168
+ # PM2.5 AQI calculation
169
+ if value <= 12.0: return int((50/12) * value)
170
+ elif value <= 35.4: return int(51 + (49/23.4) * (value - 12))
171
+ elif value <= 55.4: return int(101 + (49/20) * (value - 35.4))
172
+ elif value <= 150.4: return int(151 + (49/95) * (value - 55.4))
173
+ else: return int(201 + (199/149.6) * min(value - 150.4, 149.6))
174
+
175
+ elif parameter == 'PM10' and value >= 0:
176
+ # PM10 AQI calculation
177
+ if value <= 54: return int((50/54) * value)
178
+ elif value <= 154: return int(51 + (49/100) * (value - 54))
179
+ elif value <= 254: return int(101 + (49/100) * (value - 154))
180
+ elif value <= 354: return int(151 + (49/100) * (value - 254))
181
+ else: return int(201 + (199/146) * min(value - 354, 146))
182
+
183
+ return 0 # Default for non-AQI parameters
184
+
185
+ def get_all_stations(self) -> List[Dict]:
186
+ """Get data from all available sources"""
187
+ print("πŸš€ Fetching AirNow bulk station data...")
188
+
189
+ all_data = []
190
+
191
+ # Get hourly data (contains ALL active monitoring stations)
192
+ print("\nπŸ“Š Getting hourly monitoring data...")
193
+ hourly_data = self.get_current_hourly_data()
194
+ all_data.extend(hourly_data)
195
+
196
+ # Get reporting areas data
197
+ print("\n🌍 Getting reporting areas data...")
198
+ areas_data = self.get_reporting_areas_data()
199
+ all_data.extend(areas_data)
200
+
201
+ # Deduplicate based on site ID and parameter
202
+ print(f"\nπŸ”§ Deduplicating {len(all_data)} records...")
203
  unique_data = []
204
+ seen = set()
205
 
206
+ for record in all_data:
207
+ key = (record.get('AQSID', ''), record.get('ParameterName', ''), record.get('SiteName', ''))
208
+ if key not in seen:
209
+ seen.add(key)
210
+ unique_data.append(record)
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
+ print(f"βœ… Final result: {len(unique_data)} unique monitoring stations")
213
  return unique_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ # Usage example
216
+ if __name__ == "__main__":
217
+ fetcher = AirNowBulkFetcher()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ # Get all stations data
220
+ stations_data = fetcher.get_all_stations()
221
 
222
+ if stations_data:
223
+ # Convert to DataFrame for easy viewing
224
+ df = pd.DataFrame(stations_data)
225
+ print(f"\nπŸ“ˆ Data Summary:")
226
+ print(f"Total stations: {len(df)}")
227
+ print(f"Parameters monitored: {df['ParameterName'].nunique()}")
228
+ print(f"Unique sites: {df['SiteName'].nunique()}")
229
 
230
+ print(f"\nParameter breakdown:")
231
+ print(df['ParameterName'].value_counts().head(10))
 
 
 
 
 
 
 
232
 
233
+ print(f"\nSample records:")
234
+ print(df[['SiteName', 'ParameterName', 'Value', 'AQI', 'DataSource']].head(10))
 
 
 
 
235
 
236
+ # Save to CSV
237
+ df.to_csv('airnow_stations_data.csv', index=False)
238
+ print(f"\nπŸ’Ύ Data saved to 'airnow_stations_data.csv'")
 
239
 
240
+ else:
241
+ print("❌ No data retrieved. Check the AirNow file servers.")