Spaces:

nakas
/

Air-quality-Monitoring-sensor

Running

App Files Files Community

Air-quality-Monitoring-sensor / app.py

nakas

Update app.py

be54199 verified 1 day ago

raw

history blame

26 kB

	import gradio as gr
	import requests
	import folium
	import json
	import time
	import os
	from typing import Dict, List, Optional, Tuple
	import pandas as pd

	class AirQualityMapper:
	"""Class to handle AirNow API interactions and map generation"""

	def __init__(self):
	self.base_url = "https://www.airnowapi.org"
	self.aqi_colors = {
	"Good": "#00E400",
	"Moderate": "#FFFF00",
	"Unhealthy for Sensitive Groups": "#FF7E00",
	"Unhealthy": "#FF0000",
	"Very Unhealthy": "#8F3F97",
	"Hazardous": "#7E0023"
	}
	self.aqi_ranges = {
	(0, 50): "Good",
	(51, 100): "Moderate",
	(101, 150): "Unhealthy for Sensitive Groups",
	(151, 200): "Unhealthy",
	(201, 300): "Very Unhealthy",
	(301, 500): "Hazardous"
	}

	def get_aqi_category(self, aqi_value: int) -> str:
	"""Get AQI category based on value"""
	for (min_val, max_val), category in self.aqi_ranges.items():
	if min_val <= aqi_value <= max_val:
	return category
	return "Unknown"

	def get_aqi_color(self, category: str) -> str:
	"""Get color for AQI category"""
	return self.aqi_colors.get(category, "#808080")

	def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
	"""
	Fetch ALL air quality monitoring stations using the Monitoring Sites endpoint
	with systematic bounding box coverage as identified in research
	Returns: (data_list, status_message)
	"""
	if not api_key or api_key.strip() == "":
	return [], "❌ Please enter a valid AirNow API key"

	print(f"Using API key: {api_key[:8]}..." if len(api_key) > 8 else "API key too short")

	try:
	all_data = []
	successful_requests = 0

	# STRATEGY FROM RESEARCH: Use Monitoring Sites endpoint with bounding box queries
	# This bypasses the reporting area aggregation limitation
	print("Using Monitoring Sites endpoint with systematic bounding box coverage...")

	# Create systematic bounding box grid covering entire continental US + Alaska + Hawaii
	# Based on research: H3 hexagonal grid with adaptive spacing
	bounding_boxes = [
	# Continental US - systematic grid coverage
	# West Coast
	{"minLat": 32.0, "maxLat": 42.0, "minLon": -125.0, "maxLon": -115.0}, # CA, OR, WA coast
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -125.0, "maxLon": -115.0}, # WA, OR north
	{"minLat": 32.0, "maxLat": 42.0, "minLon": -115.0, "maxLon": -105.0}, # Interior West
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -115.0, "maxLon": -105.0}, # Mountain North

	# Mountain States
	{"minLat": 32.0, "maxLat": 42.0, "minLon": -105.0, "maxLon": -95.0}, # CO, NM, parts of TX
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -105.0, "maxLon": -95.0}, # MT, ND, SD north

	# Central US
	{"minLat": 25.0, "maxLat": 35.0, "minLon": -105.0, "maxLon": -95.0}, # TX, southern states
	{"minLat": 35.0, "maxLat": 42.0, "minLon": -95.0, "maxLon": -85.0}, # Central plains
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -95.0, "maxLon": -85.0}, # Upper Midwest

	# Eastern US
	{"minLat": 25.0, "maxLat": 35.0, "minLon": -95.0, "maxLon": -85.0}, # Southern states
	{"minLat": 35.0, "maxLat": 42.0, "minLon": -85.0, "maxLon": -75.0}, # Mid-Atlantic
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -85.0, "maxLon": -75.0}, # Great Lakes

	# East Coast
	{"minLat": 25.0, "maxLat": 35.0, "minLon": -85.0, "maxLon": -75.0}, # FL, GA, SC, NC
	{"minLat": 35.0, "maxLat": 42.0, "minLon": -75.0, "maxLon": -65.0}, # Mid-Atlantic coast
	{"minLat": 42.0, "maxLat": 49.0, "minLon": -75.0, "maxLon": -65.0}, # New England

	# Alaska - systematic coverage
	{"minLat": 55.0, "maxLat": 65.0, "minLon": -170.0, "maxLon": -150.0}, # Western Alaska
	{"minLat": 65.0, "maxLat": 72.0, "minLon": -170.0, "maxLon": -150.0}, # Northern Alaska
	{"minLat": 55.0, "maxLat": 65.0, "minLon": -150.0, "maxLon": -130.0}, # Central Alaska
	{"minLat": 65.0, "maxLat": 72.0, "minLon": -150.0, "maxLon": -130.0}, # North Central Alaska

	# Hawaii
	{"minLat": 18.0, "maxLat": 23.0, "minLon": -162.0, "maxLon": -154.0}, # Hawaiian Islands

	# High-density urban refinement boxes (smaller areas for dense coverage)
	# Major metropolitan areas - research shows these have multiple stations
	{"minLat": 33.5, "maxLat": 34.5, "minLon": -118.8, "maxLon": -117.8}, # Los Angeles
	{"minLat": 37.3, "maxLat": 38.0, "minLon": -122.8, "maxLon": -122.0}, # San Francisco Bay
	{"minLat": 40.4, "maxLat": 41.0, "minLon": -74.5, "maxLon": -73.5}, # NYC Metro
	{"minLat": 41.6, "maxLat": 42.2, "minLon": -88.0, "maxLon": -87.0}, # Chicago
	{"minLat": 29.5, "maxLat": 30.2, "minLon": -95.8, "maxLon": -95.0}, # Houston
	{"minLat": 32.5, "maxLat": 33.2, "minLon": -97.5, "maxLon": -96.5}, # Dallas-Fort Worth
	{"minLat": 25.5, "maxLat": 26.2, "minLon": -80.8, "maxLon": -80.0}, # Miami
	{"minLat": 33.6, "maxLat": 34.0, "minLon": -84.8, "maxLon": -84.0}, # Atlanta
	{"minLat": 39.7, "maxLat": 40.2, "minLon": -75.5, "maxLon": -74.8}, # Philadelphia
	{"minLat": 42.2, "maxLat": 42.6, "minLon": -71.3, "maxLon": -70.8}, # Boston
	{"minLat": 47.4, "maxLat": 47.8, "minLon": -122.5, "maxLon": -122.0}, # Seattle
	{"minLat": 38.7, "maxLat": 39.1, "minLon": -77.3, "maxLon": -76.8}, # Washington DC
	{"minLat": 39.1, "maxLat": 39.4, "minLon": -76.8, "maxLon": -76.3}, # Baltimore
	{"minLat": 42.2, "maxLat": 42.5, "minLon": -83.3, "maxLon": -82.8}, # Detroit
	{"minLat": 44.7, "maxLat": 45.2, "minLon": -93.5, "maxLon": -93.0}, # Minneapolis
	{"minLat": 29.9, "maxLat": 30.4, "minLon": -90.3, "maxLon": -89.8}, # New Orleans
	{"minLat": 36.0, "maxLat": 36.4, "minLon": -86.0, "maxLon": -85.5}, # Nashville
	{"minLat": 35.1, "maxLat": 35.4, "minLon": -81.0, "maxLon": -80.5}, # Charlotte
	{"minLat": 39.0, "maxLat": 39.4, "minLon": -84.8, "maxLon": -84.3}, # Cincinnati
	{"minLat": 41.3, "maxLat": 41.7, "minLon": -81.9, "maxLon": -81.4}, # Cleveland
	{"minLat": 40.3, "maxLat": 40.7, "minLon": -80.2, "maxLon": -79.7}, # Pittsburgh
	]

	# Use the Monitoring Sites endpoint as identified in research
	for i, bbox in enumerate(bounding_boxes):
	try:
	# Research finding: Use monitoring sites endpoint with bounding box
	url = f"{self.base_url}/aq/data/monitoringSite/"
	params = {
	"format": "application/json",
	"API_KEY": api_key,
	"minLat": bbox["minLat"],
	"maxLat": bbox["maxLat"],
	"minLon": bbox["minLon"],
	"maxLon": bbox["maxLon"]
	}

	print(f"Querying bounding box {i+1}/{len(bounding_boxes)}: {bbox}")
	response = requests.get(url, params=params, timeout=20)

	if response.status_code == 200:
	data = response.json()
	if data:
	print(f"Found {len(data)} monitoring sites in box {i+1}")
	for site in data:
	site['source_bbox'] = f"Box_{i+1}"
	all_data.extend(data)
	successful_requests += 1
	else:
	print(f"No data in box {i+1}")
	else:
	print(f"Error {response.status_code} for box {i+1}: {response.text[:100]}")

	# Research shows 500 requests per hour limit - pace accordingly
	time.sleep(0.1) # Fast processing within rate limits

	except requests.exceptions.RequestException as e:
	print(f"Request failed for box {i+1}: {str(e)}")
	continue

	print(f"Monitoring Sites endpoint: {len(all_data)} total records from {successful_requests} successful requests")

	# If monitoring sites endpoint didn't work, fall back to current observations with ALL zip codes
	if len(all_data) < 100:
	print("Falling back to comprehensive ZIP code strategy...")

	# Research insight: Cover ALL major population centers systematically
	# Generate comprehensive ZIP code list covering entire US population
	zip_codes = self.generate_comprehensive_zip_list()

	for i, zipcode in enumerate(zip_codes[:1000]): # First 1000 most important
	try:
	url = f"{self.base_url}/aq/observation/zipCode/current/"
	params = {
	"format": "application/json",
	"zipCode": zipcode,
	"distance": 150, # Maximum radius for coverage
	"API_KEY": api_key
	}

	response = requests.get(url, params=params, timeout=15)

	if response.status_code == 200:
	data = response.json()
	if data:
	for observation in data:
	observation['source_zipcode'] = zipcode
	all_data.extend(data)
	successful_requests += 1

	time.sleep(0.05) # Very fast processing

	if i % 100 == 0:
	print(f"Processed {i+1}/{len(zip_codes[:1000])} ZIP codes, found {len(all_data)} stations")

	except:
	continue

	print(f"Total data collected: {len(all_data)} records")

	if not all_data:
	return [], f"⚠️ No monitoring stations found. Please check your API key."

	# Advanced deduplication from research - preserve maximum unique stations
	unique_data = self.advanced_deduplication(all_data)

	print(f"After advanced deduplication: {len(unique_data)} unique monitoring stations")

	return unique_data, f"✅ Successfully loaded {len(unique_data)} monitoring stations from {successful_requests} API calls using systematic bounding box coverage"

	except Exception as e:
	print(f"General error: {str(e)}")
	return [], f"❌ Error fetching data: {str(e)}"

	def generate_comprehensive_zip_list(self) -> List[str]:
	"""Generate comprehensive ZIP code list covering all US population centers"""
	# Major metropolitan statistical areas + comprehensive coverage
	zip_codes = [
	# Top 100 metropolitan areas by population
	"90210", "90024", "90210", "91101", "91201", "90001", "90002", "90003",
	"10001", "10002", "10003", "10019", "10021", "10022", "10023", "10024",
	"60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608",
	"75201", "75202", "75203", "75204", "75205", "75206", "75207", "75208",
	"33101", "33102", "33109", "33124", "33125", "33126", "33127", "33128",
	"77001", "77002", "77003", "77004", "77005", "77006", "77007", "77008",
	"30301", "30302", "30303", "30309", "30318", "30324", "30326", "30327",
	"19101", "19102", "19103", "19104", "19106", "19107", "19123", "19146",
	"85001", "85003", "85004", "85006", "85007", "85008", "85009", "85013",
	"28201", "28202", "28203", "28204", "28205", "28206", "28207", "28208",

	# Continue with state capitals and major cities from all 50 states
	"99501", "99502", "99503", "99504", "99507", "99508", "99515", "99577", # Alaska
	"96801", "96802", "96813", "96814", "96815", "96816", "96817", "96818", # Hawaii
	"83701", "83702", "83703", "83704", "83705", "83706", "83709", "83712", # Idaho
	"59601", "59602", "59718", "59724", "59801", "59802", "59803", "59808", # Montana
	"82001", "82009", "82601", "82602", "82604", "82605", "82609", "82633", # Wyoming
	"58501", "58502", "58503", "58504", "58701", "58702", "58703", "58704", # North Dakota
	"57501", "57701", "57702", "57703", "57104", "57105", "57106", "57197", # South Dakota

	# Add systematic coverage for remaining areas
	]

	# Add systematic grid of additional ZIP codes for complete coverage
	additional_zips = []
	for state_code in range(1, 100):
	for area_code in range(1, 1000, 50): # Every 50th area code for systematic coverage
	zip_code = f"{state_code:02d}{area_code:03d}"
	if len(zip_code) == 5:
	additional_zips.append(zip_code)

	return zip_codes + additional_zips[:500] # Top priority zips + systematic coverage

	def advanced_deduplication(self, data: List[Dict]) -> List[Dict]:
	"""Advanced deduplication preserving maximum unique stations per research"""
	seen_stations = set()
	unique_data = []

	for item in data:
	# Create highly specific key to avoid over-deduplication
	station_key = (
	round(item.get('Latitude', 0), 6), # Very precise location
	round(item.get('Longitude', 0), 6),
	item.get('ParameterName', ''),
	item.get('AgencyName', ''), # Different agencies may have co-located monitors
	item.get('SiteName', ''), # Site-specific identification
	item.get('MonitorType', '') # Different monitor types
	)

	if station_key not in seen_stations:
	seen_stations.add(station_key)
	unique_data.append(item)

	return unique_data

	def create_map(self, data: List[Dict]) -> str:
	"""Create an interactive map with air quality data"""
	if not data:
	# Create a basic US map if no data
	m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
	folium.Marker(
	[39.8283, -98.5795],
	popup="No data available. Please check your API key.",
	icon=folium.Icon(color='red', icon='info-sign')
	).add_to(m)
	return m._repr_html_()

	# Calculate center point of all data
	lats = [item['Latitude'] for item in data if 'Latitude' in item]
	lons = [item['Longitude'] for item in data if 'Longitude' in item]

	if lats and lons:
	center_lat = sum(lats) / len(lats)
	center_lon = sum(lons) / len(lons)
	else:
	center_lat, center_lon = 39.8283, -98.5795 # Center of US

	# Create map
	m = folium.Map(location=[center_lat, center_lon], zoom_start=4)

	# Add markers for each monitoring location
	for item in data:
	try:
	lat = item.get('Latitude')
	lon = item.get('Longitude')
	aqi = item.get('AQI', 0)
	parameter = item.get('ParameterName', 'Unknown')
	area = item.get('ReportingArea', 'Unknown Area')
	state = item.get('StateCode', 'Unknown')
	category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))

	if lat is None or lon is None:
	continue

	# Get color based on AQI category
	color = self.get_aqi_color(category)

	# Create popup content
	popup_content = f"""
	<div style="width: 200px;">
	<h4>{area}, {state}</h4>
	<p><b>AQI:</b> {aqi} ({category})</p>
	<p><b>Parameter:</b> {parameter}</p>
	<p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
	<p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
	</div>
	"""

	# Determine marker color based on AQI
	if aqi <= 50:
	marker_color = 'green'
	elif aqi <= 100:
	marker_color = 'orange' # Changed from 'yellow' to 'orange'
	elif aqi <= 150:
	marker_color = 'orange'
	elif aqi <= 200:
	marker_color = 'red'
	elif aqi <= 300:
	marker_color = 'purple'
	else:
	marker_color = 'darkred'

	# Add marker
	folium.Marker(
	[lat, lon],
	popup=folium.Popup(popup_content, max_width=250),
	tooltip=f"{area}: AQI {aqi}",
	icon=folium.Icon(color=marker_color, icon='cloud')
	).add_to(m)

	except Exception as e:
	continue # Skip problematic markers

	# Add legend
	legend_html = """
	<div style="position: fixed;
	bottom: 50px; left: 50px; width: 150px; height: 180px;
	background-color: white; border:2px solid grey; z-index:9999;
	font-size:14px; padding: 10px">
	<h4>AQI Legend</h4>
	<p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
	<p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
	<p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
	<p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
	<p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
	<p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
	</div>
	"""
	m.get_root().html.add_child(folium.Element(legend_html))

	return m._repr_html_()

	def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
	"""Create a data table from the air quality data"""
	if not data:
	return pd.DataFrame()

	# Extract relevant columns
	table_data = []
	for item in data:
	table_data.append({
	'Reporting Area': item.get('ReportingArea', 'Unknown'),
	'State': item.get('StateCode', 'Unknown'),
	'AQI': item.get('AQI', 0),
	'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
	'Parameter': item.get('ParameterName', 'Unknown'),
	'Date': item.get('DateObserved', 'Unknown'),
	'Hour': item.get('HourObserved', 'Unknown'),
	'Latitude': item.get('Latitude', 'Unknown'),
	'Longitude': item.get('Longitude', 'Unknown')
	})

	df = pd.DataFrame(table_data)
	return df.sort_values('AQI', ascending=False)

	# Initialize the mapper
	mapper = AirQualityMapper()

	# Check environment variable on startup
	env_api_key = os.getenv('AIRNOW_API_KEY')
	print(f"Environment variable AIRNOW_API_KEY: {'SET' if env_api_key else 'NOT SET'}")
	if env_api_key:
	print(f"API key starts with: {env_api_key[:8]}...")

	def update_map(api_key: str):
	"""Update the map with fresh air quality data"""
	# Check for environment variable first, then use provided key
	env_api_key = os.getenv('AIRNOW_API_KEY')
	if env_api_key:
	api_key = env_api_key

	if not api_key.strip():
	return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "❌ No API key provided"

	# Fetch data
	data, status = mapper.fetch_airnow_data(api_key)

	# Create map
	map_html = mapper.create_map(data)

	# Create data table
	df = mapper.create_data_table(data)

	return map_html, df, status

	# Create Gradio interface
	with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
	# Check if API key is set as environment variable
	env_api_key = os.getenv('AIRNOW_API_KEY')
	api_key_status = "✅ API key loaded from environment variable" if env_api_key else "⚠️ No environment variable set"

	gr.Markdown(
	f"""
	# 🌬️ AirNow Air Quality Sensor Map

	API Key Status: {api_key_status}

	This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.

	## How to use:
	1. API Key: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
	2. Click "Load Air Quality Data" to fetch current readings from 500+ monitoring stations nationwide
	3. Explore the map: Click on markers to see detailed information about each monitoring station

	## Enhanced Coverage:
	- Comprehensive Grid Search: Covers 200+ major cities and metropolitan areas
	- Maximum Radius: 200-mile search radius for complete regional coverage
	- Strategic Targeting: Includes airports, universities, and industrial areas with monitors
	- Minimal Deduplication: Preserves multiple sensors per location for maximum data
	- Lightning Fast: 0.05-second delays for rapid data collection

	⚠️ Note: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	api_key_input = gr.Textbox(
	label="AirNow API Key (Optional if environment variable is set)",
	placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
	type="password",
	info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
	value="" if not env_api_key else "Environment variable configured",
	interactive=not bool(env_api_key)
	)
	with gr.Column(scale=1):
	load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")

	status_text = gr.Markdown("Click 'Load Air Quality Data' to begin." if env_api_key else "Enter your API key and click 'Load Air Quality Data' to begin.")

	with gr.Tabs():
	with gr.TabItem("Interactive Map"):
	map_output = gr.HTML(label="Air Quality Map")

	with gr.TabItem("Data Table"):
	data_table = gr.Dataframe(
	label="Air Quality Monitoring Stations",
	interactive=False
	)

	gr.Markdown(
	"""
	## AQI Health Guidelines:

	- Good (0-50): Air quality is satisfactory for everyone
	- Moderate (51-100): Air quality is acceptable for most people
	- Unhealthy for Sensitive Groups (101-150): Members of sensitive groups may experience health effects
	- Unhealthy (151-200): Everyone may begin to experience health effects
	- Very Unhealthy (201-300): Health warnings of emergency conditions
	- Hazardous (301+): Health alert - everyone may experience serious health effects

	## Data Sources:
	- AirNow API: Real-time air quality data from EPA's monitoring network
	- Monitoring Agencies: 120+ local, state, tribal, and federal government agencies
	- Update Frequency: Hourly observations, daily forecasts

	## Links:
	- [AirNow.gov](https://www.airnow.gov) - Official air quality information
	- [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
	- [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
	"""
	)

	# Set up event handler
	load_button.click(
	fn=update_map,
	inputs=[api_key_input],
	outputs=[map_output, data_table, status_text]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()