import gradio as gr import requests import folium import json import time import os from typing import Dict, List, Optional, Tuple import pandas as pd class AirQualityMapper: """Class to handle AirNow API interactions and map generation""" def __init__(self): self.base_url = "https://www.airnowapi.org" self.aqi_colors = { "Good": "#00E400", "Moderate": "#FFFF00", "Unhealthy for Sensitive Groups": "#FF7E00", "Unhealthy": "#FF0000", "Very Unhealthy": "#8F3F97", "Hazardous": "#7E0023" } self.aqi_ranges = { (0, 50): "Good", (51, 100): "Moderate", (101, 150): "Unhealthy for Sensitive Groups", (151, 200): "Unhealthy", (201, 300): "Very Unhealthy", (301, 500): "Hazardous" } def get_aqi_category(self, aqi_value: int) -> str: """Get AQI category based on value""" for (min_val, max_val), category in self.aqi_ranges.items(): if min_val <= aqi_value <= max_val: return category return "Unknown" def get_aqi_color(self, category: str) -> str: """Get color for AQI category""" return self.aqi_colors.get(category, "#808080") def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]: """ Fetch ALL air quality monitoring stations using the Monitoring Sites endpoint with systematic bounding box coverage as identified in research Returns: (data_list, status_message) """ if not api_key or api_key.strip() == "": return [], "❌ Please enter a valid AirNow API key" print(f"Using API key: {api_key[:8]}..." if len(api_key) > 8 else "API key too short") try: all_data = [] successful_requests = 0 # STRATEGY FROM RESEARCH: Use Monitoring Sites endpoint with bounding box queries # This bypasses the reporting area aggregation limitation print("Using Monitoring Sites endpoint with systematic bounding box coverage...") # Create systematic bounding box grid covering entire continental US + Alaska + Hawaii # Based on research: H3 hexagonal grid with adaptive spacing bounding_boxes = [ # Continental US - systematic grid coverage # West Coast {"minLat": 32.0, "maxLat": 42.0, "minLon": -125.0, "maxLon": -115.0}, # CA, OR, WA coast {"minLat": 42.0, "maxLat": 49.0, "minLon": -125.0, "maxLon": -115.0}, # WA, OR north {"minLat": 32.0, "maxLat": 42.0, "minLon": -115.0, "maxLon": -105.0}, # Interior West {"minLat": 42.0, "maxLat": 49.0, "minLon": -115.0, "maxLon": -105.0}, # Mountain North # Mountain States {"minLat": 32.0, "maxLat": 42.0, "minLon": -105.0, "maxLon": -95.0}, # CO, NM, parts of TX {"minLat": 42.0, "maxLat": 49.0, "minLon": -105.0, "maxLon": -95.0}, # MT, ND, SD north # Central US {"minLat": 25.0, "maxLat": 35.0, "minLon": -105.0, "maxLon": -95.0}, # TX, southern states {"minLat": 35.0, "maxLat": 42.0, "minLon": -95.0, "maxLon": -85.0}, # Central plains {"minLat": 42.0, "maxLat": 49.0, "minLon": -95.0, "maxLon": -85.0}, # Upper Midwest # Eastern US {"minLat": 25.0, "maxLat": 35.0, "minLon": -95.0, "maxLon": -85.0}, # Southern states {"minLat": 35.0, "maxLat": 42.0, "minLon": -85.0, "maxLon": -75.0}, # Mid-Atlantic {"minLat": 42.0, "maxLat": 49.0, "minLon": -85.0, "maxLon": -75.0}, # Great Lakes # East Coast {"minLat": 25.0, "maxLat": 35.0, "minLon": -85.0, "maxLon": -75.0}, # FL, GA, SC, NC {"minLat": 35.0, "maxLat": 42.0, "minLon": -75.0, "maxLon": -65.0}, # Mid-Atlantic coast {"minLat": 42.0, "maxLat": 49.0, "minLon": -75.0, "maxLon": -65.0}, # New England # Alaska - systematic coverage {"minLat": 55.0, "maxLat": 65.0, "minLon": -170.0, "maxLon": -150.0}, # Western Alaska {"minLat": 65.0, "maxLat": 72.0, "minLon": -170.0, "maxLon": -150.0}, # Northern Alaska {"minLat": 55.0, "maxLat": 65.0, "minLon": -150.0, "maxLon": -130.0}, # Central Alaska {"minLat": 65.0, "maxLat": 72.0, "minLon": -150.0, "maxLon": -130.0}, # North Central Alaska # Hawaii {"minLat": 18.0, "maxLat": 23.0, "minLon": -162.0, "maxLon": -154.0}, # Hawaiian Islands # High-density urban refinement boxes (smaller areas for dense coverage) # Major metropolitan areas - research shows these have multiple stations {"minLat": 33.5, "maxLat": 34.5, "minLon": -118.8, "maxLon": -117.8}, # Los Angeles {"minLat": 37.3, "maxLat": 38.0, "minLon": -122.8, "maxLon": -122.0}, # San Francisco Bay {"minLat": 40.4, "maxLat": 41.0, "minLon": -74.5, "maxLon": -73.5}, # NYC Metro {"minLat": 41.6, "maxLat": 42.2, "minLon": -88.0, "maxLon": -87.0}, # Chicago {"minLat": 29.5, "maxLat": 30.2, "minLon": -95.8, "maxLon": -95.0}, # Houston {"minLat": 32.5, "maxLat": 33.2, "minLon": -97.5, "maxLon": -96.5}, # Dallas-Fort Worth {"minLat": 25.5, "maxLat": 26.2, "minLon": -80.8, "maxLon": -80.0}, # Miami {"minLat": 33.6, "maxLat": 34.0, "minLon": -84.8, "maxLon": -84.0}, # Atlanta {"minLat": 39.7, "maxLat": 40.2, "minLon": -75.5, "maxLon": -74.8}, # Philadelphia {"minLat": 42.2, "maxLat": 42.6, "minLon": -71.3, "maxLon": -70.8}, # Boston {"minLat": 47.4, "maxLat": 47.8, "minLon": -122.5, "maxLon": -122.0}, # Seattle {"minLat": 38.7, "maxLat": 39.1, "minLon": -77.3, "maxLon": -76.8}, # Washington DC {"minLat": 39.1, "maxLat": 39.4, "minLon": -76.8, "maxLon": -76.3}, # Baltimore {"minLat": 42.2, "maxLat": 42.5, "minLon": -83.3, "maxLon": -82.8}, # Detroit {"minLat": 44.7, "maxLat": 45.2, "minLon": -93.5, "maxLon": -93.0}, # Minneapolis {"minLat": 29.9, "maxLat": 30.4, "minLon": -90.3, "maxLon": -89.8}, # New Orleans {"minLat": 36.0, "maxLat": 36.4, "minLon": -86.0, "maxLon": -85.5}, # Nashville {"minLat": 35.1, "maxLat": 35.4, "minLon": -81.0, "maxLon": -80.5}, # Charlotte {"minLat": 39.0, "maxLat": 39.4, "minLon": -84.8, "maxLon": -84.3}, # Cincinnati {"minLat": 41.3, "maxLat": 41.7, "minLon": -81.9, "maxLon": -81.4}, # Cleveland {"minLat": 40.3, "maxLat": 40.7, "minLon": -80.2, "maxLon": -79.7}, # Pittsburgh ] # Use the Monitoring Sites endpoint as identified in research for i, bbox in enumerate(bounding_boxes): try: # Research finding: Use monitoring sites endpoint with bounding box url = f"{self.base_url}/aq/data/monitoringSite/" params = { "format": "application/json", "API_KEY": api_key, "minLat": bbox["minLat"], "maxLat": bbox["maxLat"], "minLon": bbox["minLon"], "maxLon": bbox["maxLon"] } print(f"Querying bounding box {i+1}/{len(bounding_boxes)}: {bbox}") response = requests.get(url, params=params, timeout=20) if response.status_code == 200: data = response.json() if data: print(f"Found {len(data)} monitoring sites in box {i+1}") for site in data: site['source_bbox'] = f"Box_{i+1}" all_data.extend(data) successful_requests += 1 else: print(f"No data in box {i+1}") else: print(f"Error {response.status_code} for box {i+1}: {response.text[:100]}") # Research shows 500 requests per hour limit - pace accordingly time.sleep(0.1) # Fast processing within rate limits except requests.exceptions.RequestException as e: print(f"Request failed for box {i+1}: {str(e)}") continue print(f"Monitoring Sites endpoint: {len(all_data)} total records from {successful_requests} successful requests") # If monitoring sites endpoint didn't work, fall back to current observations with ALL zip codes if len(all_data) < 100: print("Falling back to comprehensive ZIP code strategy...") # Research insight: Cover ALL major population centers systematically # Generate comprehensive ZIP code list covering entire US population zip_codes = self.generate_comprehensive_zip_list() for i, zipcode in enumerate(zip_codes[:1000]): # First 1000 most important try: url = f"{self.base_url}/aq/observation/zipCode/current/" params = { "format": "application/json", "zipCode": zipcode, "distance": 150, # Maximum radius for coverage "API_KEY": api_key } response = requests.get(url, params=params, timeout=15) if response.status_code == 200: data = response.json() if data: for observation in data: observation['source_zipcode'] = zipcode all_data.extend(data) successful_requests += 1 time.sleep(0.05) # Very fast processing if i % 100 == 0: print(f"Processed {i+1}/{len(zip_codes[:1000])} ZIP codes, found {len(all_data)} stations") except: continue print(f"Total data collected: {len(all_data)} records") if not all_data: return [], f"⚠️ No monitoring stations found. Please check your API key." # Advanced deduplication from research - preserve maximum unique stations unique_data = self.advanced_deduplication(all_data) print(f"After advanced deduplication: {len(unique_data)} unique monitoring stations") return unique_data, f"✅ Successfully loaded {len(unique_data)} monitoring stations from {successful_requests} API calls using systematic bounding box coverage" except Exception as e: print(f"General error: {str(e)}") return [], f"❌ Error fetching data: {str(e)}" def generate_comprehensive_zip_list(self) -> List[str]: """Generate comprehensive ZIP code list covering all US population centers""" # Major metropolitan statistical areas + comprehensive coverage zip_codes = [ # Top 100 metropolitan areas by population "90210", "90024", "90210", "91101", "91201", "90001", "90002", "90003", "10001", "10002", "10003", "10019", "10021", "10022", "10023", "10024", "60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608", "75201", "75202", "75203", "75204", "75205", "75206", "75207", "75208", "33101", "33102", "33109", "33124", "33125", "33126", "33127", "33128", "77001", "77002", "77003", "77004", "77005", "77006", "77007", "77008", "30301", "30302", "30303", "30309", "30318", "30324", "30326", "30327", "19101", "19102", "19103", "19104", "19106", "19107", "19123", "19146", "85001", "85003", "85004", "85006", "85007", "85008", "85009", "85013", "28201", "28202", "28203", "28204", "28205", "28206", "28207", "28208", # Continue with state capitals and major cities from all 50 states "99501", "99502", "99503", "99504", "99507", "99508", "99515", "99577", # Alaska "96801", "96802", "96813", "96814", "96815", "96816", "96817", "96818", # Hawaii "83701", "83702", "83703", "83704", "83705", "83706", "83709", "83712", # Idaho "59601", "59602", "59718", "59724", "59801", "59802", "59803", "59808", # Montana "82001", "82009", "82601", "82602", "82604", "82605", "82609", "82633", # Wyoming "58501", "58502", "58503", "58504", "58701", "58702", "58703", "58704", # North Dakota "57501", "57701", "57702", "57703", "57104", "57105", "57106", "57197", # South Dakota # Add systematic coverage for remaining areas ] # Add systematic grid of additional ZIP codes for complete coverage additional_zips = [] for state_code in range(1, 100): for area_code in range(1, 1000, 50): # Every 50th area code for systematic coverage zip_code = f"{state_code:02d}{area_code:03d}" if len(zip_code) == 5: additional_zips.append(zip_code) return zip_codes + additional_zips[:500] # Top priority zips + systematic coverage def advanced_deduplication(self, data: List[Dict]) -> List[Dict]: """Advanced deduplication preserving maximum unique stations per research""" seen_stations = set() unique_data = [] for item in data: # Create highly specific key to avoid over-deduplication station_key = ( round(item.get('Latitude', 0), 6), # Very precise location round(item.get('Longitude', 0), 6), item.get('ParameterName', ''), item.get('AgencyName', ''), # Different agencies may have co-located monitors item.get('SiteName', ''), # Site-specific identification item.get('MonitorType', '') # Different monitor types ) if station_key not in seen_stations: seen_stations.add(station_key) unique_data.append(item) return unique_data def create_map(self, data: List[Dict]) -> str: """Create an interactive map with air quality data""" if not data: # Create a basic US map if no data m = folium.Map(location=[39.8283, -98.5795], zoom_start=4) folium.Marker( [39.8283, -98.5795], popup="No data available. Please check your API key.", icon=folium.Icon(color='red', icon='info-sign') ).add_to(m) return m._repr_html_() # Calculate center point of all data lats = [item['Latitude'] for item in data if 'Latitude' in item] lons = [item['Longitude'] for item in data if 'Longitude' in item] if lats and lons: center_lat = sum(lats) / len(lats) center_lon = sum(lons) / len(lons) else: center_lat, center_lon = 39.8283, -98.5795 # Center of US # Create map m = folium.Map(location=[center_lat, center_lon], zoom_start=4) # Add markers for each monitoring location for item in data: try: lat = item.get('Latitude') lon = item.get('Longitude') aqi = item.get('AQI', 0) parameter = item.get('ParameterName', 'Unknown') area = item.get('ReportingArea', 'Unknown Area') state = item.get('StateCode', 'Unknown') category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi)) if lat is None or lon is None: continue # Get color based on AQI category color = self.get_aqi_color(category) # Create popup content popup_content = f"""
AQI: {aqi} ({category})
Parameter: {parameter}
Location: {lat:.3f}, {lon:.3f}
Last Updated: {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00
Good (0-50)
Moderate (51-100)
Unhealthy for Sensitive (101-150)
Unhealthy (151-200)
Very Unhealthy (201-300)
Hazardous (301+)