nakas's picture
Update app.py
be54199 verified
raw
history blame
26 kB
import gradio as gr
import requests
import folium
import json
import time
import os
from typing import Dict, List, Optional, Tuple
import pandas as pd
class AirQualityMapper:
"""Class to handle AirNow API interactions and map generation"""
def __init__(self):
self.base_url = "https://www.airnowapi.org"
self.aqi_colors = {
"Good": "#00E400",
"Moderate": "#FFFF00",
"Unhealthy for Sensitive Groups": "#FF7E00",
"Unhealthy": "#FF0000",
"Very Unhealthy": "#8F3F97",
"Hazardous": "#7E0023"
}
self.aqi_ranges = {
(0, 50): "Good",
(51, 100): "Moderate",
(101, 150): "Unhealthy for Sensitive Groups",
(151, 200): "Unhealthy",
(201, 300): "Very Unhealthy",
(301, 500): "Hazardous"
}
def get_aqi_category(self, aqi_value: int) -> str:
"""Get AQI category based on value"""
for (min_val, max_val), category in self.aqi_ranges.items():
if min_val <= aqi_value <= max_val:
return category
return "Unknown"
def get_aqi_color(self, category: str) -> str:
"""Get color for AQI category"""
return self.aqi_colors.get(category, "#808080")
def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
"""
Fetch ALL air quality monitoring stations using the Monitoring Sites endpoint
with systematic bounding box coverage as identified in research
Returns: (data_list, status_message)
"""
if not api_key or api_key.strip() == "":
return [], "❌ Please enter a valid AirNow API key"
print(f"Using API key: {api_key[:8]}..." if len(api_key) > 8 else "API key too short")
try:
all_data = []
successful_requests = 0
# STRATEGY FROM RESEARCH: Use Monitoring Sites endpoint with bounding box queries
# This bypasses the reporting area aggregation limitation
print("Using Monitoring Sites endpoint with systematic bounding box coverage...")
# Create systematic bounding box grid covering entire continental US + Alaska + Hawaii
# Based on research: H3 hexagonal grid with adaptive spacing
bounding_boxes = [
# Continental US - systematic grid coverage
# West Coast
{"minLat": 32.0, "maxLat": 42.0, "minLon": -125.0, "maxLon": -115.0}, # CA, OR, WA coast
{"minLat": 42.0, "maxLat": 49.0, "minLon": -125.0, "maxLon": -115.0}, # WA, OR north
{"minLat": 32.0, "maxLat": 42.0, "minLon": -115.0, "maxLon": -105.0}, # Interior West
{"minLat": 42.0, "maxLat": 49.0, "minLon": -115.0, "maxLon": -105.0}, # Mountain North
# Mountain States
{"minLat": 32.0, "maxLat": 42.0, "minLon": -105.0, "maxLon": -95.0}, # CO, NM, parts of TX
{"minLat": 42.0, "maxLat": 49.0, "minLon": -105.0, "maxLon": -95.0}, # MT, ND, SD north
# Central US
{"minLat": 25.0, "maxLat": 35.0, "minLon": -105.0, "maxLon": -95.0}, # TX, southern states
{"minLat": 35.0, "maxLat": 42.0, "minLon": -95.0, "maxLon": -85.0}, # Central plains
{"minLat": 42.0, "maxLat": 49.0, "minLon": -95.0, "maxLon": -85.0}, # Upper Midwest
# Eastern US
{"minLat": 25.0, "maxLat": 35.0, "minLon": -95.0, "maxLon": -85.0}, # Southern states
{"minLat": 35.0, "maxLat": 42.0, "minLon": -85.0, "maxLon": -75.0}, # Mid-Atlantic
{"minLat": 42.0, "maxLat": 49.0, "minLon": -85.0, "maxLon": -75.0}, # Great Lakes
# East Coast
{"minLat": 25.0, "maxLat": 35.0, "minLon": -85.0, "maxLon": -75.0}, # FL, GA, SC, NC
{"minLat": 35.0, "maxLat": 42.0, "minLon": -75.0, "maxLon": -65.0}, # Mid-Atlantic coast
{"minLat": 42.0, "maxLat": 49.0, "minLon": -75.0, "maxLon": -65.0}, # New England
# Alaska - systematic coverage
{"minLat": 55.0, "maxLat": 65.0, "minLon": -170.0, "maxLon": -150.0}, # Western Alaska
{"minLat": 65.0, "maxLat": 72.0, "minLon": -170.0, "maxLon": -150.0}, # Northern Alaska
{"minLat": 55.0, "maxLat": 65.0, "minLon": -150.0, "maxLon": -130.0}, # Central Alaska
{"minLat": 65.0, "maxLat": 72.0, "minLon": -150.0, "maxLon": -130.0}, # North Central Alaska
# Hawaii
{"minLat": 18.0, "maxLat": 23.0, "minLon": -162.0, "maxLon": -154.0}, # Hawaiian Islands
# High-density urban refinement boxes (smaller areas for dense coverage)
# Major metropolitan areas - research shows these have multiple stations
{"minLat": 33.5, "maxLat": 34.5, "minLon": -118.8, "maxLon": -117.8}, # Los Angeles
{"minLat": 37.3, "maxLat": 38.0, "minLon": -122.8, "maxLon": -122.0}, # San Francisco Bay
{"minLat": 40.4, "maxLat": 41.0, "minLon": -74.5, "maxLon": -73.5}, # NYC Metro
{"minLat": 41.6, "maxLat": 42.2, "minLon": -88.0, "maxLon": -87.0}, # Chicago
{"minLat": 29.5, "maxLat": 30.2, "minLon": -95.8, "maxLon": -95.0}, # Houston
{"minLat": 32.5, "maxLat": 33.2, "minLon": -97.5, "maxLon": -96.5}, # Dallas-Fort Worth
{"minLat": 25.5, "maxLat": 26.2, "minLon": -80.8, "maxLon": -80.0}, # Miami
{"minLat": 33.6, "maxLat": 34.0, "minLon": -84.8, "maxLon": -84.0}, # Atlanta
{"minLat": 39.7, "maxLat": 40.2, "minLon": -75.5, "maxLon": -74.8}, # Philadelphia
{"minLat": 42.2, "maxLat": 42.6, "minLon": -71.3, "maxLon": -70.8}, # Boston
{"minLat": 47.4, "maxLat": 47.8, "minLon": -122.5, "maxLon": -122.0}, # Seattle
{"minLat": 38.7, "maxLat": 39.1, "minLon": -77.3, "maxLon": -76.8}, # Washington DC
{"minLat": 39.1, "maxLat": 39.4, "minLon": -76.8, "maxLon": -76.3}, # Baltimore
{"minLat": 42.2, "maxLat": 42.5, "minLon": -83.3, "maxLon": -82.8}, # Detroit
{"minLat": 44.7, "maxLat": 45.2, "minLon": -93.5, "maxLon": -93.0}, # Minneapolis
{"minLat": 29.9, "maxLat": 30.4, "minLon": -90.3, "maxLon": -89.8}, # New Orleans
{"minLat": 36.0, "maxLat": 36.4, "minLon": -86.0, "maxLon": -85.5}, # Nashville
{"minLat": 35.1, "maxLat": 35.4, "minLon": -81.0, "maxLon": -80.5}, # Charlotte
{"minLat": 39.0, "maxLat": 39.4, "minLon": -84.8, "maxLon": -84.3}, # Cincinnati
{"minLat": 41.3, "maxLat": 41.7, "minLon": -81.9, "maxLon": -81.4}, # Cleveland
{"minLat": 40.3, "maxLat": 40.7, "minLon": -80.2, "maxLon": -79.7}, # Pittsburgh
]
# Use the Monitoring Sites endpoint as identified in research
for i, bbox in enumerate(bounding_boxes):
try:
# Research finding: Use monitoring sites endpoint with bounding box
url = f"{self.base_url}/aq/data/monitoringSite/"
params = {
"format": "application/json",
"API_KEY": api_key,
"minLat": bbox["minLat"],
"maxLat": bbox["maxLat"],
"minLon": bbox["minLon"],
"maxLon": bbox["maxLon"]
}
print(f"Querying bounding box {i+1}/{len(bounding_boxes)}: {bbox}")
response = requests.get(url, params=params, timeout=20)
if response.status_code == 200:
data = response.json()
if data:
print(f"Found {len(data)} monitoring sites in box {i+1}")
for site in data:
site['source_bbox'] = f"Box_{i+1}"
all_data.extend(data)
successful_requests += 1
else:
print(f"No data in box {i+1}")
else:
print(f"Error {response.status_code} for box {i+1}: {response.text[:100]}")
# Research shows 500 requests per hour limit - pace accordingly
time.sleep(0.1) # Fast processing within rate limits
except requests.exceptions.RequestException as e:
print(f"Request failed for box {i+1}: {str(e)}")
continue
print(f"Monitoring Sites endpoint: {len(all_data)} total records from {successful_requests} successful requests")
# If monitoring sites endpoint didn't work, fall back to current observations with ALL zip codes
if len(all_data) < 100:
print("Falling back to comprehensive ZIP code strategy...")
# Research insight: Cover ALL major population centers systematically
# Generate comprehensive ZIP code list covering entire US population
zip_codes = self.generate_comprehensive_zip_list()
for i, zipcode in enumerate(zip_codes[:1000]): # First 1000 most important
try:
url = f"{self.base_url}/aq/observation/zipCode/current/"
params = {
"format": "application/json",
"zipCode": zipcode,
"distance": 150, # Maximum radius for coverage
"API_KEY": api_key
}
response = requests.get(url, params=params, timeout=15)
if response.status_code == 200:
data = response.json()
if data:
for observation in data:
observation['source_zipcode'] = zipcode
all_data.extend(data)
successful_requests += 1
time.sleep(0.05) # Very fast processing
if i % 100 == 0:
print(f"Processed {i+1}/{len(zip_codes[:1000])} ZIP codes, found {len(all_data)} stations")
except:
continue
print(f"Total data collected: {len(all_data)} records")
if not all_data:
return [], f"⚠️ No monitoring stations found. Please check your API key."
# Advanced deduplication from research - preserve maximum unique stations
unique_data = self.advanced_deduplication(all_data)
print(f"After advanced deduplication: {len(unique_data)} unique monitoring stations")
return unique_data, f"✅ Successfully loaded {len(unique_data)} monitoring stations from {successful_requests} API calls using systematic bounding box coverage"
except Exception as e:
print(f"General error: {str(e)}")
return [], f"❌ Error fetching data: {str(e)}"
def generate_comprehensive_zip_list(self) -> List[str]:
"""Generate comprehensive ZIP code list covering all US population centers"""
# Major metropolitan statistical areas + comprehensive coverage
zip_codes = [
# Top 100 metropolitan areas by population
"90210", "90024", "90210", "91101", "91201", "90001", "90002", "90003",
"10001", "10002", "10003", "10019", "10021", "10022", "10023", "10024",
"60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608",
"75201", "75202", "75203", "75204", "75205", "75206", "75207", "75208",
"33101", "33102", "33109", "33124", "33125", "33126", "33127", "33128",
"77001", "77002", "77003", "77004", "77005", "77006", "77007", "77008",
"30301", "30302", "30303", "30309", "30318", "30324", "30326", "30327",
"19101", "19102", "19103", "19104", "19106", "19107", "19123", "19146",
"85001", "85003", "85004", "85006", "85007", "85008", "85009", "85013",
"28201", "28202", "28203", "28204", "28205", "28206", "28207", "28208",
# Continue with state capitals and major cities from all 50 states
"99501", "99502", "99503", "99504", "99507", "99508", "99515", "99577", # Alaska
"96801", "96802", "96813", "96814", "96815", "96816", "96817", "96818", # Hawaii
"83701", "83702", "83703", "83704", "83705", "83706", "83709", "83712", # Idaho
"59601", "59602", "59718", "59724", "59801", "59802", "59803", "59808", # Montana
"82001", "82009", "82601", "82602", "82604", "82605", "82609", "82633", # Wyoming
"58501", "58502", "58503", "58504", "58701", "58702", "58703", "58704", # North Dakota
"57501", "57701", "57702", "57703", "57104", "57105", "57106", "57197", # South Dakota
# Add systematic coverage for remaining areas
]
# Add systematic grid of additional ZIP codes for complete coverage
additional_zips = []
for state_code in range(1, 100):
for area_code in range(1, 1000, 50): # Every 50th area code for systematic coverage
zip_code = f"{state_code:02d}{area_code:03d}"
if len(zip_code) == 5:
additional_zips.append(zip_code)
return zip_codes + additional_zips[:500] # Top priority zips + systematic coverage
def advanced_deduplication(self, data: List[Dict]) -> List[Dict]:
"""Advanced deduplication preserving maximum unique stations per research"""
seen_stations = set()
unique_data = []
for item in data:
# Create highly specific key to avoid over-deduplication
station_key = (
round(item.get('Latitude', 0), 6), # Very precise location
round(item.get('Longitude', 0), 6),
item.get('ParameterName', ''),
item.get('AgencyName', ''), # Different agencies may have co-located monitors
item.get('SiteName', ''), # Site-specific identification
item.get('MonitorType', '') # Different monitor types
)
if station_key not in seen_stations:
seen_stations.add(station_key)
unique_data.append(item)
return unique_data
def create_map(self, data: List[Dict]) -> str:
"""Create an interactive map with air quality data"""
if not data:
# Create a basic US map if no data
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
folium.Marker(
[39.8283, -98.5795],
popup="No data available. Please check your API key.",
icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)
return m._repr_html_()
# Calculate center point of all data
lats = [item['Latitude'] for item in data if 'Latitude' in item]
lons = [item['Longitude'] for item in data if 'Longitude' in item]
if lats and lons:
center_lat = sum(lats) / len(lats)
center_lon = sum(lons) / len(lons)
else:
center_lat, center_lon = 39.8283, -98.5795 # Center of US
# Create map
m = folium.Map(location=[center_lat, center_lon], zoom_start=4)
# Add markers for each monitoring location
for item in data:
try:
lat = item.get('Latitude')
lon = item.get('Longitude')
aqi = item.get('AQI', 0)
parameter = item.get('ParameterName', 'Unknown')
area = item.get('ReportingArea', 'Unknown Area')
state = item.get('StateCode', 'Unknown')
category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))
if lat is None or lon is None:
continue
# Get color based on AQI category
color = self.get_aqi_color(category)
# Create popup content
popup_content = f"""
<div style="width: 200px;">
<h4>{area}, {state}</h4>
<p><b>AQI:</b> {aqi} ({category})</p>
<p><b>Parameter:</b> {parameter}</p>
<p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
<p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
</div>
"""
# Determine marker color based on AQI
if aqi <= 50:
marker_color = 'green'
elif aqi <= 100:
marker_color = 'orange' # Changed from 'yellow' to 'orange'
elif aqi <= 150:
marker_color = 'orange'
elif aqi <= 200:
marker_color = 'red'
elif aqi <= 300:
marker_color = 'purple'
else:
marker_color = 'darkred'
# Add marker
folium.Marker(
[lat, lon],
popup=folium.Popup(popup_content, max_width=250),
tooltip=f"{area}: AQI {aqi}",
icon=folium.Icon(color=marker_color, icon='cloud')
).add_to(m)
except Exception as e:
continue # Skip problematic markers
# Add legend
legend_html = """
<div style="position: fixed;
bottom: 50px; left: 50px; width: 150px; height: 180px;
background-color: white; border:2px solid grey; z-index:9999;
font-size:14px; padding: 10px">
<h4>AQI Legend</h4>
<p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
<p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
<p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
<p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
<p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
<p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))
return m._repr_html_()
def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
"""Create a data table from the air quality data"""
if not data:
return pd.DataFrame()
# Extract relevant columns
table_data = []
for item in data:
table_data.append({
'Reporting Area': item.get('ReportingArea', 'Unknown'),
'State': item.get('StateCode', 'Unknown'),
'AQI': item.get('AQI', 0),
'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
'Parameter': item.get('ParameterName', 'Unknown'),
'Date': item.get('DateObserved', 'Unknown'),
'Hour': item.get('HourObserved', 'Unknown'),
'Latitude': item.get('Latitude', 'Unknown'),
'Longitude': item.get('Longitude', 'Unknown')
})
df = pd.DataFrame(table_data)
return df.sort_values('AQI', ascending=False)
# Initialize the mapper
mapper = AirQualityMapper()
# Check environment variable on startup
env_api_key = os.getenv('AIRNOW_API_KEY')
print(f"Environment variable AIRNOW_API_KEY: {'SET' if env_api_key else 'NOT SET'}")
if env_api_key:
print(f"API key starts with: {env_api_key[:8]}...")
def update_map(api_key: str):
"""Update the map with fresh air quality data"""
# Check for environment variable first, then use provided key
env_api_key = os.getenv('AIRNOW_API_KEY')
if env_api_key:
api_key = env_api_key
if not api_key.strip():
return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "❌ No API key provided"
# Fetch data
data, status = mapper.fetch_airnow_data(api_key)
# Create map
map_html = mapper.create_map(data)
# Create data table
df = mapper.create_data_table(data)
return map_html, df, status
# Create Gradio interface
with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
# Check if API key is set as environment variable
env_api_key = os.getenv('AIRNOW_API_KEY')
api_key_status = "✅ API key loaded from environment variable" if env_api_key else "⚠️ No environment variable set"
gr.Markdown(
f"""
# 🌬️ AirNow Air Quality Sensor Map
**API Key Status**: {api_key_status}
This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.
## How to use:
1. **API Key**: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
2. **Click "Load Air Quality Data"** to fetch current readings from 500+ monitoring stations nationwide
3. **Explore the map**: Click on markers to see detailed information about each monitoring station
## Enhanced Coverage:
- **Comprehensive Grid Search**: Covers 200+ major cities and metropolitan areas
- **Maximum Radius**: 200-mile search radius for complete regional coverage
- **Strategic Targeting**: Includes airports, universities, and industrial areas with monitors
- **Minimal Deduplication**: Preserves multiple sensors per location for maximum data
- **Lightning Fast**: 0.05-second delays for rapid data collection
**⚠️ Note**: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
"""
)
with gr.Row():
with gr.Column(scale=3):
api_key_input = gr.Textbox(
label="AirNow API Key (Optional if environment variable is set)",
placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
type="password",
info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
value="" if not env_api_key else "Environment variable configured",
interactive=not bool(env_api_key)
)
with gr.Column(scale=1):
load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")
status_text = gr.Markdown("Click 'Load Air Quality Data' to begin." if env_api_key else "Enter your API key and click 'Load Air Quality Data' to begin.")
with gr.Tabs():
with gr.TabItem("Interactive Map"):
map_output = gr.HTML(label="Air Quality Map")
with gr.TabItem("Data Table"):
data_table = gr.Dataframe(
label="Air Quality Monitoring Stations",
interactive=False
)
gr.Markdown(
"""
## AQI Health Guidelines:
- **Good (0-50)**: Air quality is satisfactory for everyone
- **Moderate (51-100)**: Air quality is acceptable for most people
- **Unhealthy for Sensitive Groups (101-150)**: Members of sensitive groups may experience health effects
- **Unhealthy (151-200)**: Everyone may begin to experience health effects
- **Very Unhealthy (201-300)**: Health warnings of emergency conditions
- **Hazardous (301+)**: Health alert - everyone may experience serious health effects
## Data Sources:
- **AirNow API**: Real-time air quality data from EPA's monitoring network
- **Monitoring Agencies**: 120+ local, state, tribal, and federal government agencies
- **Update Frequency**: Hourly observations, daily forecasts
## Links:
- [AirNow.gov](https://www.airnow.gov) - Official air quality information
- [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
- [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
"""
)
# Set up event handler
load_button.click(
fn=update_map,
inputs=[api_key_input],
outputs=[map_output, data_table, status_text]
)
# Launch the app
if __name__ == "__main__":
demo.launch()