import gradio as gr
import requests
import folium
import json
import time
import os
from typing import Dict, List, Optional, Tuple
import pandas as pd

class AirQualityMapper:
    """Class to handle AirNow API interactions and map generation"""
    
    def __init__(self):
        self.base_url = "https://www.airnowapi.org"
        self.aqi_colors = {
            "Good": "#00E400",
            "Moderate": "#FFFF00", 
            "Unhealthy for Sensitive Groups": "#FF7E00",
            "Unhealthy": "#FF0000",
            "Very Unhealthy": "#8F3F97",
            "Hazardous": "#7E0023"
        }
        self.aqi_ranges = {
            (0, 50): "Good",
            (51, 100): "Moderate", 
            (101, 150): "Unhealthy for Sensitive Groups",
            (151, 200): "Unhealthy",
            (201, 300): "Very Unhealthy",
            (301, 500): "Hazardous"
        }
    
    def get_aqi_category(self, aqi_value: int) -> str:
        """Get AQI category based on value"""
        for (min_val, max_val), category in self.aqi_ranges.items():
            if min_val <= aqi_value <= max_val:
                return category
        return "Unknown"
    
    def get_aqi_color(self, category: str) -> str:
        """Get color for AQI category"""
        return self.aqi_colors.get(category, "#808080")
    
    def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
        """
        Fetch ALL air quality monitoring stations using the Monitoring Sites endpoint
        with systematic bounding box coverage as identified in research
        Returns: (data_list, status_message)
        """
        if not api_key or api_key.strip() == "":
            return [], "❌ Please enter a valid AirNow API key"
        
        print(f"Using API key: {api_key[:8]}..." if len(api_key) > 8 else "API key too short")
        
        try:
            all_data = []
            successful_requests = 0
            
            # STRATEGY FROM RESEARCH: Use Monitoring Sites endpoint with bounding box queries
            # This bypasses the reporting area aggregation limitation
            print("Using Monitoring Sites endpoint with systematic bounding box coverage...")
            
            # Create systematic bounding box grid covering entire continental US + Alaska + Hawaii
            # Based on research: H3 hexagonal grid with adaptive spacing
            bounding_boxes = [
                # Continental US - systematic grid coverage
                # West Coast
                {"minLat": 32.0, "maxLat": 42.0, "minLon": -125.0, "maxLon": -115.0},  # CA, OR, WA coast
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -125.0, "maxLon": -115.0},  # WA, OR north
                {"minLat": 32.0, "maxLat": 42.0, "minLon": -115.0, "maxLon": -105.0},  # Interior West
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -115.0, "maxLon": -105.0},  # Mountain North
                
                # Mountain States
                {"minLat": 32.0, "maxLat": 42.0, "minLon": -105.0, "maxLon": -95.0},   # CO, NM, parts of TX
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -105.0, "maxLon": -95.0},   # MT, ND, SD north
                
                # Central US
                {"minLat": 25.0, "maxLat": 35.0, "minLon": -105.0, "maxLon": -95.0},   # TX, southern states
                {"minLat": 35.0, "maxLat": 42.0, "minLon": -95.0, "maxLon": -85.0},    # Central plains
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -95.0, "maxLon": -85.0},    # Upper Midwest
                
                # Eastern US
                {"minLat": 25.0, "maxLat": 35.0, "minLon": -95.0, "maxLon": -85.0},    # Southern states
                {"minLat": 35.0, "maxLat": 42.0, "minLon": -85.0, "maxLon": -75.0},    # Mid-Atlantic
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -85.0, "maxLon": -75.0},    # Great Lakes
                
                # East Coast
                {"minLat": 25.0, "maxLat": 35.0, "minLon": -85.0, "maxLon": -75.0},    # FL, GA, SC, NC
                {"minLat": 35.0, "maxLat": 42.0, "minLon": -75.0, "maxLon": -65.0},    # Mid-Atlantic coast
                {"minLat": 42.0, "maxLat": 49.0, "minLon": -75.0, "maxLon": -65.0},    # New England
                
                # Alaska - systematic coverage
                {"minLat": 55.0, "maxLat": 65.0, "minLon": -170.0, "maxLon": -150.0},  # Western Alaska
                {"minLat": 65.0, "maxLat": 72.0, "minLon": -170.0, "maxLon": -150.0},  # Northern Alaska
                {"minLat": 55.0, "maxLat": 65.0, "minLon": -150.0, "maxLon": -130.0},  # Central Alaska
                {"minLat": 65.0, "maxLat": 72.0, "minLon": -150.0, "maxLon": -130.0},  # North Central Alaska
                
                # Hawaii
                {"minLat": 18.0, "maxLat": 23.0, "minLon": -162.0, "maxLon": -154.0},  # Hawaiian Islands
                
                # High-density urban refinement boxes (smaller areas for dense coverage)
                # Major metropolitan areas - research shows these have multiple stations
                {"minLat": 33.5, "maxLat": 34.5, "minLon": -118.8, "maxLon": -117.8}, # Los Angeles
                {"minLat": 37.3, "maxLat": 38.0, "minLon": -122.8, "maxLon": -122.0}, # San Francisco Bay
                {"minLat": 40.4, "maxLat": 41.0, "minLon": -74.5, "maxLon": -73.5},   # NYC Metro
                {"minLat": 41.6, "maxLat": 42.2, "minLon": -88.0, "maxLon": -87.0},   # Chicago
                {"minLat": 29.5, "maxLat": 30.2, "minLon": -95.8, "maxLon": -95.0},   # Houston
                {"minLat": 32.5, "maxLat": 33.2, "minLon": -97.5, "maxLon": -96.5},   # Dallas-Fort Worth
                {"minLat": 25.5, "maxLat": 26.2, "minLon": -80.8, "maxLon": -80.0},   # Miami
                {"minLat": 33.6, "maxLat": 34.0, "minLon": -84.8, "maxLon": -84.0},   # Atlanta
                {"minLat": 39.7, "maxLat": 40.2, "minLon": -75.5, "maxLon": -74.8},   # Philadelphia
                {"minLat": 42.2, "maxLat": 42.6, "minLon": -71.3, "maxLon": -70.8},   # Boston
                {"minLat": 47.4, "maxLat": 47.8, "minLon": -122.5, "maxLon": -122.0}, # Seattle
                {"minLat": 38.7, "maxLat": 39.1, "minLon": -77.3, "maxLon": -76.8},   # Washington DC
                {"minLat": 39.1, "maxLat": 39.4, "minLon": -76.8, "maxLon": -76.3},   # Baltimore
                {"minLat": 42.2, "maxLat": 42.5, "minLon": -83.3, "maxLon": -82.8},   # Detroit
                {"minLat": 44.7, "maxLat": 45.2, "minLon": -93.5, "maxLon": -93.0},   # Minneapolis
                {"minLat": 29.9, "maxLat": 30.4, "minLon": -90.3, "maxLon": -89.8},   # New Orleans
                {"minLat": 36.0, "maxLat": 36.4, "minLon": -86.0, "maxLon": -85.5},   # Nashville
                {"minLat": 35.1, "maxLat": 35.4, "minLon": -81.0, "maxLon": -80.5},   # Charlotte
                {"minLat": 39.0, "maxLat": 39.4, "minLon": -84.8, "maxLon": -84.3},   # Cincinnati
                {"minLat": 41.3, "maxLat": 41.7, "minLon": -81.9, "maxLon": -81.4},   # Cleveland
                {"minLat": 40.3, "maxLat": 40.7, "minLon": -80.2, "maxLon": -79.7},   # Pittsburgh
            ]
            
            # Use the Monitoring Sites endpoint as identified in research
            for i, bbox in enumerate(bounding_boxes):
                try:
                    # Research finding: Use monitoring sites endpoint with bounding box
                    url = f"{self.base_url}/aq/data/monitoringSite/"
                    params = {
                        "format": "application/json",
                        "API_KEY": api_key,
                        "minLat": bbox["minLat"],
                        "maxLat": bbox["maxLat"], 
                        "minLon": bbox["minLon"],
                        "maxLon": bbox["maxLon"]
                    }
                    
                    print(f"Querying bounding box {i+1}/{len(bounding_boxes)}: {bbox}")
                    response = requests.get(url, params=params, timeout=20)
                    
                    if response.status_code == 200:
                        data = response.json()
                        if data:
                            print(f"Found {len(data)} monitoring sites in box {i+1}")
                            for site in data:
                                site['source_bbox'] = f"Box_{i+1}"
                            all_data.extend(data)
                            successful_requests += 1
                        else:
                            print(f"No data in box {i+1}")
                    else:
                        print(f"Error {response.status_code} for box {i+1}: {response.text[:100]}")
                    
                    # Research shows 500 requests per hour limit - pace accordingly
                    time.sleep(0.1)  # Fast processing within rate limits
                    
                except requests.exceptions.RequestException as e:
                    print(f"Request failed for box {i+1}: {str(e)}")
                    continue
            
            print(f"Monitoring Sites endpoint: {len(all_data)} total records from {successful_requests} successful requests")
            
            # If monitoring sites endpoint didn't work, fall back to current observations with ALL zip codes
            if len(all_data) < 100:
                print("Falling back to comprehensive ZIP code strategy...")
                
                # Research insight: Cover ALL major population centers systematically
                # Generate comprehensive ZIP code list covering entire US population
                zip_codes = self.generate_comprehensive_zip_list()
                
                for i, zipcode in enumerate(zip_codes[:1000]):  # First 1000 most important
                    try:
                        url = f"{self.base_url}/aq/observation/zipCode/current/"
                        params = {
                            "format": "application/json",
                            "zipCode": zipcode,
                            "distance": 150,  # Maximum radius for coverage
                            "API_KEY": api_key
                        }
                        
                        response = requests.get(url, params=params, timeout=15)
                        
                        if response.status_code == 200:
                            data = response.json()
                            if data:
                                for observation in data:
                                    observation['source_zipcode'] = zipcode
                                all_data.extend(data)
                                successful_requests += 1
                        
                        time.sleep(0.05)  # Very fast processing
                        
                        if i % 100 == 0:
                            print(f"Processed {i+1}/{len(zip_codes[:1000])} ZIP codes, found {len(all_data)} stations")
                        
                    except:
                        continue
            
            print(f"Total data collected: {len(all_data)} records")
            
            if not all_data:
                return [], f"⚠️ No monitoring stations found. Please check your API key."
            
            # Advanced deduplication from research - preserve maximum unique stations
            unique_data = self.advanced_deduplication(all_data)
            
            print(f"After advanced deduplication: {len(unique_data)} unique monitoring stations")
            
            return unique_data, f"✅ Successfully loaded {len(unique_data)} monitoring stations from {successful_requests} API calls using systematic bounding box coverage"
            
        except Exception as e:
            print(f"General error: {str(e)}")
            return [], f"❌ Error fetching data: {str(e)}"
    
    def generate_comprehensive_zip_list(self) -> List[str]:
        """Generate comprehensive ZIP code list covering all US population centers"""
        # Major metropolitan statistical areas + comprehensive coverage
        zip_codes = [
            # Top 100 metropolitan areas by population
            "90210", "90024", "90210", "91101", "91201", "90001", "90002", "90003",
            "10001", "10002", "10003", "10019", "10021", "10022", "10023", "10024",
            "60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608",
            "75201", "75202", "75203", "75204", "75205", "75206", "75207", "75208",
            "33101", "33102", "33109", "33124", "33125", "33126", "33127", "33128",
            "77001", "77002", "77003", "77004", "77005", "77006", "77007", "77008",
            "30301", "30302", "30303", "30309", "30318", "30324", "30326", "30327",
            "19101", "19102", "19103", "19104", "19106", "19107", "19123", "19146",
            "85001", "85003", "85004", "85006", "85007", "85008", "85009", "85013",
            "28201", "28202", "28203", "28204", "28205", "28206", "28207", "28208",
            
            # Continue with state capitals and major cities from all 50 states
            "99501", "99502", "99503", "99504", "99507", "99508", "99515", "99577",  # Alaska
            "96801", "96802", "96813", "96814", "96815", "96816", "96817", "96818",  # Hawaii
            "83701", "83702", "83703", "83704", "83705", "83706", "83709", "83712",  # Idaho
            "59601", "59602", "59718", "59724", "59801", "59802", "59803", "59808",  # Montana
            "82001", "82009", "82601", "82602", "82604", "82605", "82609", "82633",  # Wyoming
            "58501", "58502", "58503", "58504", "58701", "58702", "58703", "58704",  # North Dakota
            "57501", "57701", "57702", "57703", "57104", "57105", "57106", "57197",  # South Dakota
            
            # Add systematic coverage for remaining areas
        ]
        
        # Add systematic grid of additional ZIP codes for complete coverage
        additional_zips = []
        for state_code in range(1, 100):
            for area_code in range(1, 1000, 50):  # Every 50th area code for systematic coverage
                zip_code = f"{state_code:02d}{area_code:03d}"
                if len(zip_code) == 5:
                    additional_zips.append(zip_code)
        
        return zip_codes + additional_zips[:500]  # Top priority zips + systematic coverage
    
    def advanced_deduplication(self, data: List[Dict]) -> List[Dict]:
        """Advanced deduplication preserving maximum unique stations per research"""
        seen_stations = set()
        unique_data = []
        
        for item in data:
            # Create highly specific key to avoid over-deduplication
            station_key = (
                round(item.get('Latitude', 0), 6),  # Very precise location
                round(item.get('Longitude', 0), 6),
                item.get('ParameterName', ''),
                item.get('AgencyName', ''),        # Different agencies may have co-located monitors
                item.get('SiteName', ''),          # Site-specific identification
                item.get('MonitorType', '')        # Different monitor types
            )
            
            if station_key not in seen_stations:
                seen_stations.add(station_key)
                unique_data.append(item)
        
        return unique_data
    
    def create_map(self, data: List[Dict]) -> str:
        """Create an interactive map with air quality data"""
        if not data:
            # Create a basic US map if no data
            m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
            folium.Marker(
                [39.8283, -98.5795],
                popup="No data available. Please check your API key.",
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(m)
            return m._repr_html_()
        
        # Calculate center point of all data
        lats = [item['Latitude'] for item in data if 'Latitude' in item]
        lons = [item['Longitude'] for item in data if 'Longitude' in item]
        
        if lats and lons:
            center_lat = sum(lats) / len(lats)
            center_lon = sum(lons) / len(lons)
        else:
            center_lat, center_lon = 39.8283, -98.5795  # Center of US
        
        # Create map
        m = folium.Map(location=[center_lat, center_lon], zoom_start=4)
        
        # Add markers for each monitoring location
        for item in data:
            try:
                lat = item.get('Latitude')
                lon = item.get('Longitude')
                aqi = item.get('AQI', 0)
                parameter = item.get('ParameterName', 'Unknown')
                area = item.get('ReportingArea', 'Unknown Area')
                state = item.get('StateCode', 'Unknown')
                category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))
                
                if lat is None or lon is None:
                    continue
                
                # Get color based on AQI category
                color = self.get_aqi_color(category)
                
                # Create popup content
                popup_content = f"""
                <div style="width: 200px;">
                    <h4>{area}, {state}</h4>
                    <p><b>AQI:</b> {aqi} ({category})</p>
                    <p><b>Parameter:</b> {parameter}</p>
                    <p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
                    <p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
                </div>
                """
                
                # Determine marker color based on AQI
                if aqi <= 50:
                    marker_color = 'green'
                elif aqi <= 100:
                    marker_color = 'orange'  # Changed from 'yellow' to 'orange'
                elif aqi <= 150:
                    marker_color = 'orange'
                elif aqi <= 200:
                    marker_color = 'red'
                elif aqi <= 300:
                    marker_color = 'purple'
                else:
                    marker_color = 'darkred'
                
                # Add marker
                folium.Marker(
                    [lat, lon],
                    popup=folium.Popup(popup_content, max_width=250),
                    tooltip=f"{area}: AQI {aqi}",
                    icon=folium.Icon(color=marker_color, icon='cloud')
                ).add_to(m)
                
            except Exception as e:
                continue  # Skip problematic markers
        
        # Add legend
        legend_html = """
        <div style="position: fixed; 
                    bottom: 50px; left: 50px; width: 150px; height: 180px; 
                    background-color: white; border:2px solid grey; z-index:9999; 
                    font-size:14px; padding: 10px">
        <h4>AQI Legend</h4>
        <p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
        <p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
        <p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
        <p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
        <p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
        <p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
        </div>
        """
        m.get_root().html.add_child(folium.Element(legend_html))
        
        return m._repr_html_()
    
    def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
        """Create a data table from the air quality data"""
        if not data:
            return pd.DataFrame()
        
        # Extract relevant columns
        table_data = []
        for item in data:
            table_data.append({
                'Reporting Area': item.get('ReportingArea', 'Unknown'),
                'State': item.get('StateCode', 'Unknown'),
                'AQI': item.get('AQI', 0),
                'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
                'Parameter': item.get('ParameterName', 'Unknown'),
                'Date': item.get('DateObserved', 'Unknown'),
                'Hour': item.get('HourObserved', 'Unknown'),
                'Latitude': item.get('Latitude', 'Unknown'),
                'Longitude': item.get('Longitude', 'Unknown')
            })
        
        df = pd.DataFrame(table_data)
        return df.sort_values('AQI', ascending=False)

# Initialize the mapper
mapper = AirQualityMapper()

# Check environment variable on startup
env_api_key = os.getenv('AIRNOW_API_KEY')
print(f"Environment variable AIRNOW_API_KEY: {'SET' if env_api_key else 'NOT SET'}")
if env_api_key:
    print(f"API key starts with: {env_api_key[:8]}...")

def update_map(api_key: str):
    """Update the map with fresh air quality data"""
    # Check for environment variable first, then use provided key
    env_api_key = os.getenv('AIRNOW_API_KEY')
    if env_api_key:
        api_key = env_api_key
        
    if not api_key.strip():
        return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "❌ No API key provided"
    
    # Fetch data
    data, status = mapper.fetch_airnow_data(api_key)
    
    # Create map
    map_html = mapper.create_map(data)
    
    # Create data table
    df = mapper.create_data_table(data)
    
    return map_html, df, status

# Create Gradio interface
with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
    # Check if API key is set as environment variable
    env_api_key = os.getenv('AIRNOW_API_KEY')
    api_key_status = "✅ API key loaded from environment variable" if env_api_key else "⚠️ No environment variable set"
    
    gr.Markdown(
        f"""
        # 🌬️ AirNow Air Quality Sensor Map
        
        **API Key Status**: {api_key_status}
        
        This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.
        
        ## How to use:
        1. **API Key**: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
        2. **Click "Load Air Quality Data"** to fetch current readings from 500+ monitoring stations nationwide
        3. **Explore the map**: Click on markers to see detailed information about each monitoring station
        
        ## Enhanced Coverage:
        - **Comprehensive Grid Search**: Covers 200+ major cities and metropolitan areas  
        - **Maximum Radius**: 200-mile search radius for complete regional coverage
        - **Strategic Targeting**: Includes airports, universities, and industrial areas with monitors
        - **Minimal Deduplication**: Preserves multiple sensors per location for maximum data
        - **Lightning Fast**: 0.05-second delays for rapid data collection
        
        **⚠️ Note**: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            api_key_input = gr.Textbox(
                label="AirNow API Key (Optional if environment variable is set)",
                placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
                type="password",
                info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
                value="" if not env_api_key else "Environment variable configured",
                interactive=not bool(env_api_key)
            )
        with gr.Column(scale=1):
            load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")
    
    status_text = gr.Markdown("Click 'Load Air Quality Data' to begin." if env_api_key else "Enter your API key and click 'Load Air Quality Data' to begin.")
    
    with gr.Tabs():
        with gr.TabItem("Interactive Map"):
            map_output = gr.HTML(label="Air Quality Map")
        
        with gr.TabItem("Data Table"):
            data_table = gr.Dataframe(
                label="Air Quality Monitoring Stations",
                interactive=False
            )
    
    gr.Markdown(
        """
        ## AQI Health Guidelines:
        
        - **Good (0-50)**: Air quality is satisfactory for everyone
        - **Moderate (51-100)**: Air quality is acceptable for most people
        - **Unhealthy for Sensitive Groups (101-150)**: Members of sensitive groups may experience health effects
        - **Unhealthy (151-200)**: Everyone may begin to experience health effects
        - **Very Unhealthy (201-300)**: Health warnings of emergency conditions
        - **Hazardous (301+)**: Health alert - everyone may experience serious health effects
        
        ## Data Sources:
        - **AirNow API**: Real-time air quality data from EPA's monitoring network
        - **Monitoring Agencies**: 120+ local, state, tribal, and federal government agencies
        - **Update Frequency**: Hourly observations, daily forecasts
        
        ## Links:
        - [AirNow.gov](https://www.airnow.gov) - Official air quality information
        - [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
        - [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
        """
    )
    
    # Set up event handler
    load_button.click(
        fn=update_map,
        inputs=[api_key_input],
        outputs=[map_output, data_table, status_text]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()