Spaces:

nakas
/

Air-quality-Monitoring-sensor

Running

App Files Files Community

nakas commited on about 22 hours ago

Commit

b0c2c45

verified ·

1 Parent(s): b21ff10

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -637

app.py CHANGED Viewed

@@ -1,694 +1,241 @@
-import gradio as gr
 import requests
-import folium
-import json
-import time
-import os
-from typing import Dict, List, Optional, Tuple
 import pandas as pd
-class AirQualityMapper:
-    """Class to handle AirNow API interactions and map generation"""
     def __init__(self):
-        self.base_url = "https://www.airnowapi.org"
-        self.aqi_colors = {
-            "Good": "#00E400",
-            "Moderate": "#FFFF00",
-            "Unhealthy for Sensitive Groups": "#FF7E00",
-            "Unhealthy": "#FF0000",
-            "Very Unhealthy": "#8F3F97",
-            "Hazardous": "#7E0023"
-        }
-        self.aqi_ranges = {
-            (0, 50): "Good",
-            (51, 100): "Moderate",
-            (101, 150): "Unhealthy for Sensitive Groups",
-            (151, 200): "Unhealthy",
-            (201, 300): "Very Unhealthy",
-            (301, 500): "Hazardous"
-        }
-    def get_aqi_category(self, aqi_value: int) -> str:
-        """Get AQI category based on value"""
-        for (min_val, max_val), category in self.aqi_ranges.items():
-            if min_val <= aqi_value <= max_val:
-                return category
-        return "Unknown"
-    def get_aqi_color(self, category: str) -> str:
-        """Get color for AQI category"""
-        return self.aqi_colors.get(category, "#808080")
-    def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
-        """
-        DIRECT ACCESS: Get ALL monitoring stations from hourly data files
-        Returns: (data_list, status_message)
-        """
-        if not api_key or api_key.strip() == "":
-            return [], "❌ Please enter a valid AirNow API key"
-        print(f"🎯 DIRECT FILE ACCESS: Grabbing hourly data files with ALL monitoring sites...")
-        try:
-            all_data = []
-            # STRATEGY 1: Access hourly data files directly
-            print("📁 STRATEGY 1: Accessing bulk hourly data files...")
-            hourly_data = self.get_hourly_data_files(api_key)
-            all_data.extend(hourly_data)
-            print(f"Hourly files found: {len(hourly_data)} station records")
-            # STRATEGY 2: Try file products mentioned in research
-            print("📁 STRATEGY 2: Accessing file products and bulk endpoints...")
-            file_data = self.get_file_products(api_key)
-            all_data.extend(file_data)
-            print(f"File products found: {len(file_data)} additional records")
-            # STRATEGY 3: Try data dumps and bulk exports
-            print("📁 STRATEGY 3: Accessing data dumps and exports...")
-            export_data = self.get_bulk_exports(api_key)
-            all_data.extend(export_data)
-            print(f"Bulk exports found: {len(export_data)} additional records")
-            # STRATEGY 4: Parse any CSV/XML/JSON data files
-            print("📁 STRATEGY 4: Parsing structured data files...")
-            parsed_data = self.parse_structured_files(api_key)
-            all_data.extend(parsed_data)
-            print(f"Parsed files found: {len(parsed_data)} additional records")
-            print(f"🎯 Total raw data from files: {len(all_data)} records")
-            if not all_data:
-                return [], f"⚠️ No bulk data files accessible with this API key."
-            # Comprehensive deduplication
-            unique_data = self.comprehensive_deduplication(all_data)
-            print(f"🏆 FINAL RESULT: {len(unique_data)} unique monitoring stations from bulk files")
-            return unique_data, f"🎯 BULK FILE ACCESS: Found {len(unique_data)} monitoring stations from hourly data files"
-        except Exception as e:
-            print(f"File access error: {str(e)}")
-            return [], f"❌ Error accessing bulk files: {str(e)}"
-    def get_hourly_data_files(self, api_key: str) -> List[Dict]:
-        """Access hourly data files containing all monitoring sites"""
-        data = []
-        # Try various hourly data file endpoints
-        hourly_endpoints = [
-            f"{self.base_url}/files/data/",
-            f"{self.base_url}/files/hourly/",
-            f"{self.base_url}/files/",
-            f"{self.base_url}/aq/data/hourly/",
-            f"{self.base_url}/aq/files/",
-            f"{self.base_url}/data/",
-            f"{self.base_url}/hourly/",
-            # Try specific file formats
-            f"{self.base_url}/files/HourlyData.dat",
-            f"{self.base_url}/files/MonitoringSites.dat",
-            f"{self.base_url}/files/reportingarea.dat",
-            f"{self.base_url}/files/HourlyData.csv",
-            f"{self.base_url}/files/stations.csv",
-            f"{self.base_url}/files/current.csv",
-            f"{self.base_url}/files/sites.xml",
-            f"{self.base_url}/files/data.json",
-        ]
-        for endpoint in hourly_endpoints:
             try:
-                print(f"Trying: {endpoint}")
-                # Try with API key parameter
-                response = requests.get(endpoint,
-                                      params={"API_KEY": api_key, "format": "json"},
-                                      timeout=30)
-                if response.status_code == 200:
-                    print(f"✅ SUCCESS: {endpoint} returned data")
-                    # Try to parse as JSON first
-                    try:
-                        json_data = response.json()
-                        if isinstance(json_data, list):
-                            for record in json_data:
-                                record['source'] = 'hourly_file'
-                            data.extend(json_data)
-                            print(f"Parsed JSON: {len(json_data)} records")
-                            continue
-                    except:
-                        pass
-                    # Try to parse as CSV
-                    try:
-                        csv_data = self.parse_csv_response(response.text)
-                        if csv_data:
-                            data.extend(csv_data)
-                            print(f"Parsed CSV: {len(csv_data)} records")
-                            continue
-                    except:
-                        pass
-                    # Try to parse as pipe-delimited (common AirNow format)
-                    try:
-                        pipe_data = self.parse_pipe_delimited(response.text)
-                        if pipe_data:
-                            data.extend(pipe_data)
-                            print(f"Parsed pipe-delimited: {len(pipe_data)} records")
-                            continue
-                    except:
-                        pass
-                    print(f"Could not parse response format from {endpoint}")
-                else:
-                    print(f"❌ {endpoint}: HTTP {response.status_code}")
-                time.sleep(0.1)
             except Exception as e:
-                print(f"Error accessing {endpoint}: {str(e)}")
                 continue
         return data
-    def get_file_products(self, api_key: str) -> List[Dict]:
-        """Access file products mentioned in AirNow documentation"""
-        data = []
-        # File products mentioned in research and documentation
-        file_products = [
-            # From airnowtech.org - these often have bulk data
-            "https://files.airnowtech.org/airnow/today/HourlyData.dat",
-            "https://files.airnowtech.org/airnow/today/daily_summary.dat",
-            "https://files.airnowtech.org/airnow/today/reportingarea.dat",
-            "https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat",
-            "https://files.airnowtech.org/airnow/HourlyData.dat",
-            "https://files.airnowtech.org/HourlyData.dat",
-            "https://files.airnowtech.org/reportingarea.dat",
-            "https://files.airnowtech.org/airnow/today/HourlyAQObs.dat",
-            "https://files.airnowtech.org/airnow/today/HourlyAQForecast.dat",
-            # Try direct API file endpoints
-            f"{self.base_url}/files/data/HourlyData.dat",
-            f"{self.base_url}/files/HourlyData.dat",
-            f"{self.base_url}/files/today/HourlyData.dat",
-            f"{self.base_url}/files/reportingarea.dat",
-            f"{self.base_url}/files/sites.dat",
-            f"{self.base_url}/files/monitors.dat",
-        ]
-        for file_url in file_products:
-            try:
-                print(f"Trying file: {file_url}")
-                # Try with and without API key
-                for use_api_key in [True, False]:
-                    try:
-                        if use_api_key and "airnowapi.org" in file_url:
-                            params = {"API_KEY": api_key}
-                        else:
-                            params = {}
-                        response = requests.get(file_url, params=params, timeout=30)
-                        if response.status_code == 200 and response.text.strip():
-                            print(f"✅ File found: {file_url}")
-                            # Parse the file content
-                            file_data = self.parse_airnow_file_format(response.text)
-                            if file_data:
-                                for record in file_data:
-                                    record['source'] = 'file_product'
-                                data.extend(file_data)
-                                print(f"Parsed file: {len(file_data)} records")
-                                break  # Success, move to next file
-                    except Exception as e:
-                        continue
-                time.sleep(0.1)
-            except Exception as e:
-                continue
-        return data
-    def get_bulk_exports(self, api_key: str) -> List[Dict]:
-        """Try bulk export endpoints"""
         data = []
-        export_endpoints = [
-            f"{self.base_url}/aq/data/",
-            f"{self.base_url}/aq/observation/",
-            f"{self.base_url}/aq/monitoring/",
-            f"{self.base_url}/export/",
-            f"{self.base_url}/bulk/",
-            f"{self.base_url}/download/",
-            f"{self.base_url}/api/data/",
-        ]
-        for endpoint in export_endpoints:
             try:
-                params = {
-                    "format": "json",
-                    "API_KEY": api_key,
-                    "datatype": "monitoring",
-                    "export": "all"
-                }
-                response = requests.get(endpoint, params=params, timeout=20)
-                if response.status_code == 200:
-                    try:
-                        export_data = response.json()
-                        if export_data and isinstance(export_data, list):
-                            for record in export_data:
-                                record['source'] = 'bulk_export'
-                            data.extend(export_data)
-                            print(f"Bulk export: {len(export_data)} records")
-                    except:
-                        pass
-            except:
-                continue
-        return data
-    def parse_structured_files(self, api_key: str) -> List[Dict]:
-        """Try to get structured data files in various formats"""
-        data = []
-        # Try current hour data endpoint (should have all active stations)
-        try:
-            from datetime import datetime
-            import pytz
-            # Get current hour in Eastern Time (AirNow's timezone)
-            eastern = pytz.timezone('US/Eastern')
-            now = datetime.now(eastern)
-            # Try current hour endpoint
-            hour_endpoints = [
-                f"{self.base_url}/aq/observation/zipCode/current/",
-                f"{self.base_url}/aq/data/monitoring/current/",
-                f"{self.base_url}/aq/observation/latLong/current/",
-            ]
-            # Try to get data for entire country using bounding box
-            usa_bbox = {
-                "minLat": 18.0,  # Southern tip of Hawaii
-                "maxLat": 72.0,  # Northern Alaska
-                "minLon": -180.0, # Western Alaska
-                "maxLon": -65.0   # Eastern Maine
-            }
-            for endpoint in hour_endpoints:
-                try:
-                    if "zipCode" in endpoint:
-                        # Use major ZIP codes to get broad coverage
-                        major_zips = ["10001", "90210", "60601", "77001", "33101", "85001", "98101"]
-                        for zipcode in major_zips:
-                            params = {
-                                "format": "application/json",
-                                "zipCode": zipcode,
-                                "distance": 200,  # Max distance
-                                "API_KEY": api_key
-                            }
-                            response = requests.get(endpoint, params=params, timeout=15)
-                            if response.status_code == 200:
-                                zip_data = response.json()
-                                if zip_data:
-                                    data.extend(zip_data)
-                    elif "latLong" in endpoint:
-                        # Use center of US
-                        params = {
-                            "format": "application/json",
-                            "latitude": 39.8283,
-                            "longitude": -98.5795,
-                            "distance": 2000,  # Very large radius
-                            "API_KEY": api_key
-                        }
-                        response = requests.get(endpoint, params=params, timeout=15)
-                        if response.status_code == 200:
-                            center_data = response.json()
-                            if center_data:
-                                data.extend(center_data)
-                    else:
-                        # Try with bounding box if supported
-                        params = {
-                            "format": "application/json",
-                            "API_KEY": api_key,
-                            **usa_bbox
-                        }
-                        response = requests.get(endpoint, params=params, timeout=15)
-                        if response.status_code == 200:
-                            bbox_data = response.json()
-                            if bbox_data:
-                                data.extend(bbox_data)
-                except Exception as e:
-                    continue
-        except Exception as e:
-            print(f"Error in structured files: {str(e)}")
         return data
-    def parse_csv_response(self, text: str) -> List[Dict]:
-        """Parse CSV format response"""
-        import csv
-        from io import StringIO
-        try:
-            reader = csv.DictReader(StringIO(text))
-            return [row for row in reader]
-        except:
-            return []
-    def parse_pipe_delimited(self, text: str) -> List[Dict]:
-        """Parse pipe-delimited format (common in AirNow files)"""
-        lines = text.strip().split('\n')
-        if len(lines) < 2:
-            return []
-        try:
-            # First line might be headers
-            headers = lines[0].split('|')
-            data = []
-            for line in lines[1:]:
-                values = line.split('|')
-                if len(values) == len(headers):
-                    record = dict(zip(headers, values))
-                    data.append(record)
-            return data
-        except:
-            return []
-    def parse_airnow_file_format(self, text: str) -> List[Dict]:
-        """Parse standard AirNow file format"""
         lines = text.strip().split('\n')
         data = []
         for line in lines:
-            if not line.strip() or line.startswith('#'):
                 continue
-            try:
-                # Try pipe-delimited first
-                if '|' in line:
-                    parts = line.split('|')
-                elif ',' in line:
-                    parts = line.split(',')
-                elif '\t' in line:
-                    parts = line.split('\t')
-                else:
-                    continue
-                # Create record based on typical AirNow format
-                if len(parts) >= 8:
                     record = {
-                        'DateObserved': parts[0] if len(parts) > 0 else '',
-                        'HourObserved': parts[1] if len(parts) > 1 else '',
-                        'LocalTimeZone': parts[2] if len(parts) > 2 else '',
-                        'ReportingArea': parts[3] if len(parts) > 3 else '',
-                        'StateCode': parts[4] if len(parts) > 4 else '',
-                        'Latitude': float(parts[5]) if len(parts) > 5 and parts[5] else 0,
-                        'Longitude': float(parts[6]) if len(parts) > 6 and parts[6] else 0,
-                        'ParameterName': parts[7] if len(parts) > 7 else '',
-                        'AQI': int(parts[8]) if len(parts) > 8 and parts[8].isdigit() else 0,
-                        'CategoryName': parts[9] if len(parts) > 9 else '',
-                        'source': 'parsed_file'
                     }
                     data.append(record)
             except:
                 continue
         return data
-    def comprehensive_deduplication(self, data: List[Dict]) -> List[Dict]:
-        """Comprehensive deduplication preserving maximum unique stations"""
-        seen_stations = set()
         unique_data = []
-        for item in data:
-            # Create ultra-specific key to preserve different monitor types
-            station_key = (
-                round(item.get('Latitude', 0), 8),      # Very high precision
-                round(item.get('Longitude', 0), 8),
-                item.get('ParameterName', ''),
-                item.get('SiteName', ''),
-                item.get('AgencyName', ''),
-                item.get('MonitorType', ''),
-                item.get('ReportingArea', ''),
-                item.get('StateCode', ''),
-                item.get('CountyCode', '')
-            )
-            if station_key not in seen_stations:
-                seen_stations.add(station_key)
-                unique_data.append(item)
         return unique_data
-    def create_map(self, data: List[Dict]) -> str:
-        """Create an interactive map with air quality data"""
-        if not data:
-            # Create a basic US map if no data
-            m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)
-            folium.Marker(
-                [39.8283, -98.5795],
-                popup="No data available. Please check your API key.",
-                icon=folium.Icon(color='red', icon='info-sign')
-            ).add_to(m)
-            return m._repr_html_()
-        # Calculate center point of all data
-        lats = [item['Latitude'] for item in data if 'Latitude' in item]
-        lons = [item['Longitude'] for item in data if 'Longitude' in item]
-        if lats and lons:
-            center_lat = sum(lats) / len(lats)
-            center_lon = sum(lons) / len(lons)
-        else:
-            center_lat, center_lon = 39.8283, -98.5795  # Center of US
-        # Create map
-        m = folium.Map(location=[center_lat, center_lon], zoom_start=4)
-        # Add markers for each monitoring location
-        for item in data:
-            try:
-                lat = item.get('Latitude')
-                lon = item.get('Longitude')
-                aqi = item.get('AQI', 0)
-                parameter = item.get('ParameterName', 'Unknown')
-                area = item.get('ReportingArea', 'Unknown Area')
-                state = item.get('StateCode', 'Unknown')
-                category = item.get('Category', {}).get('Name', self.get_aqi_category(aqi))
-                if lat is None or lon is None:
-                    continue
-                # Get color based on AQI category
-                color = self.get_aqi_color(category)
-                # Create popup content
-                popup_content = f"""
-                <div style="width: 200px;">
-                    <h4>{area}, {state}</h4>
-                    <p><b>AQI:</b> {aqi} ({category})</p>
-                    <p><b>Parameter:</b> {parameter}</p>
-                    <p><b>Location:</b> {lat:.3f}, {lon:.3f}</p>
-                    <p><b>Last Updated:</b> {item.get('DateObserved', 'Unknown')} {item.get('HourObserved', '')}:00</p>
-                </div>
-                """
-                # Determine marker color based on AQI
-                if aqi <= 50:
-                    marker_color = 'green'
-                elif aqi <= 100:
-                    marker_color = 'orange'  # Changed from 'yellow' to 'orange'
-                elif aqi <= 150:
-                    marker_color = 'orange'
-                elif aqi <= 200:
-                    marker_color = 'red'
-                elif aqi <= 300:
-                    marker_color = 'purple'
-                else:
-                    marker_color = 'darkred'
-                # Add marker
-                folium.Marker(
-                    [lat, lon],
-                    popup=folium.Popup(popup_content, max_width=250),
-                    tooltip=f"{area}: AQI {aqi}",
-                    icon=folium.Icon(color=marker_color, icon='cloud')
-                ).add_to(m)
-            except Exception as e:
-                continue  # Skip problematic markers
-        # Add legend
-        legend_html = """
-        <div style="position: fixed;
-                    bottom: 50px; left: 50px; width: 150px; height: 180px;
-                    background-color: white; border:2px solid grey; z-index:9999;
-                    font-size:14px; padding: 10px">
-        <h4>AQI Legend</h4>
-        <p><i class="fa fa-circle" style="color:green"></i> Good (0-50)</p>
-        <p><i class="fa fa-circle" style="color:orange"></i> Moderate (51-100)</p>
-        <p><i class="fa fa-circle" style="color:orange"></i> Unhealthy for Sensitive (101-150)</p>
-        <p><i class="fa fa-circle" style="color:red"></i> Unhealthy (151-200)</p>
-        <p><i class="fa fa-circle" style="color:purple"></i> Very Unhealthy (201-300)</p>
-        <p><i class="fa fa-circle" style="color:darkred"></i> Hazardous (301+)</p>
-        </div>
-        """
-        m.get_root().html.add_child(folium.Element(legend_html))
-        return m._repr_html_()
-    def create_data_table(self, data: List[Dict]) -> pd.DataFrame:
-        """Create a data table from the air quality data"""
-        if not data:
-            return pd.DataFrame()
-        # Extract relevant columns
-        table_data = []
-        for item in data:
-            table_data.append({
-                'Reporting Area': item.get('ReportingArea', 'Unknown'),
-                'State': item.get('StateCode', 'Unknown'),
-                'AQI': item.get('AQI', 0),
-                'Category': item.get('Category', {}).get('Name', self.get_aqi_category(item.get('AQI', 0))),
-                'Parameter': item.get('ParameterName', 'Unknown'),
-                'Date': item.get('DateObserved', 'Unknown'),
-                'Hour': item.get('HourObserved', 'Unknown'),
-                'Latitude': item.get('Latitude', 'Unknown'),
-                'Longitude': item.get('Longitude', 'Unknown')
-            })
-        df = pd.DataFrame(table_data)
-        return df.sort_values('AQI', ascending=False)
-# Initialize the mapper
-mapper = AirQualityMapper()
-# Check environment variable on startup
-env_api_key = os.getenv('AIRNOW_API_KEY')
-print(f"Environment variable AIRNOW_API_KEY: {'SET' if env_api_key else 'NOT SET'}")
-if env_api_key:
-    print(f"API key starts with: {env_api_key[:8]}...")
-def update_map(api_key: str):
-    """Update the map with fresh air quality data"""
-    # Check for environment variable first, then use provided key
-    env_api_key = os.getenv('AIRNOW_API_KEY')
-    if env_api_key:
-        api_key = env_api_key
-    if not api_key.strip():
-        return "Please enter your AirNow API key above or set AIRNOW_API_KEY environment variable.", pd.DataFrame(), "❌ No API key provided"
-    # Fetch data
-    data, status = mapper.fetch_airnow_data(api_key)
-    # Create map
-    map_html = mapper.create_map(data)
-    # Create data table
-    df = mapper.create_data_table(data)
-    return map_html, df, status
-# Create Gradio interface
-with gr.Blocks(title="AirNow Air Quality Sensor Map", theme=gr.themes.Soft()) as demo:
-    # Check if API key is set as environment variable
-    env_api_key = os.getenv('AIRNOW_API_KEY')
-    api_key_status = "✅ API key loaded from environment variable" if env_api_key else "⚠️ No environment variable set"
-    gr.Markdown(
-        f"""
-        # 🌬️ AirNow Air Quality Sensor Map
-        **API Key Status**: {api_key_status}
-        This interactive map displays real-time air quality data from EPA's AirNow network of over 2,000 monitoring stations across the United States.
-        ## How to use:
-        1. **API Key**: {"API key is already configured via environment variable" if env_api_key else "Enter your API key below or set AIRNOW_API_KEY environment variable"}
-        2. **Click "Load Air Quality Data"** to fetch current readings from 500+ monitoring stations nationwide
-        3. **Explore the map**: Click on markers to see detailed information about each monitoring station
-        ## Enhanced Coverage:
-        - **Comprehensive Grid Search**: Covers 200+ major cities and metropolitan areas
-        - **Maximum Radius**: 200-mile search radius for complete regional coverage
-        - **Strategic Targeting**: Includes airports, universities, and industrial areas with monitors
-        - **Minimal Deduplication**: Preserves multiple sensors per location for maximum data
-        - **Lightning Fast**: 0.05-second delays for rapid data collection
-        **⚠️ Note**: This data is preliminary and should not be used for regulatory decisions. For official data, visit [EPA's AirData](https://www.epa.gov/outdoor-air-quality-data).
-        """
-    )
-    with gr.Row():
-        with gr.Column(scale=3):
-            api_key_input = gr.Textbox(
-                label="AirNow API Key (Optional if environment variable is set)",
-                placeholder="Enter your AirNow API key here..." if not env_api_key else "Using environment variable AIRNOW_API_KEY",
-                type="password",
-                info="Get your free API key at docs.airnowapi.org" if not env_api_key else "Environment variable is being used",
-                value="" if not env_api_key else "Environment variable configured",
-                interactive=not bool(env_api_key)
-            )
-        with gr.Column(scale=1):
-            load_button = gr.Button("Load Air Quality Data", variant="primary", size="lg")
-    status_text = gr.Markdown("Click 'Load Air Quality Data' to begin." if env_api_key else "Enter your API key and click 'Load Air Quality Data' to begin.")
-    with gr.Tabs():
-        with gr.TabItem("Interactive Map"):
-            map_output = gr.HTML(label="Air Quality Map")
-        with gr.TabItem("Data Table"):
-            data_table = gr.Dataframe(
-                label="Air Quality Monitoring Stations",
-                interactive=False
-            )
-    gr.Markdown(
-        """
-        ## AQI Health Guidelines:
-        - **Good (0-50)**: Air quality is satisfactory for everyone
-        - **Moderate (51-100)**: Air quality is acceptable for most people
-        - **Unhealthy for Sensitive Groups (101-150)**: Members of sensitive groups may experience health effects
-        - **Unhealthy (151-200)**: Everyone may begin to experience health effects
-        - **Very Unhealthy (201-300)**: Health warnings of emergency conditions
-        - **Hazardous (301+)**: Health alert - everyone may experience serious health effects
-        ## Data Sources:
-        - **AirNow API**: Real-time air quality data from EPA's monitoring network
-        - **Monitoring Agencies**: 120+ local, state, tribal, and federal government agencies
-        - **Update Frequency**: Hourly observations, daily forecasts
-        ## Links:
-        - [AirNow.gov](https://www.airnow.gov) - Official air quality information
-        - [AirNow API Documentation](https://docs.airnowapi.org/) - API documentation and registration
-        - [EPA AirData](https://www.epa.gov/outdoor-air-quality-data) - Official regulatory air quality data
-        """
-    )
-    # Set up event handler
-    load_button.click(
-        fn=update_map,
-        inputs=[api_key_input],
-        outputs=[map_output, data_table, status_text]
-    )
-# Launch the app
-if __name__ == "__main__":
-    demo.launch()

 import requests
 import pandas as pd
+from datetime import datetime, timedelta
+import pytz
+from typing import List, Dict
+import time
+class AirNowBulkFetcher:
+    """Simplified class to fetch bulk AirNow data from the actual working endpoints"""
     def __init__(self):
+        self.base_url = "https://files.airnowtech.org"
+    def get_current_hourly_data(self) -> List[Dict]:
+        """Get the most recent hourly data file with ALL monitoring stations"""
+        # Get current GMT time (AirNow uses GMT)
+        gmt_now = datetime.now(pytz.UTC)
+        # Try current hour and previous few hours
+        data = []
+        for hour_offset in range(0, 6):  # Try current and past 5 hours
             try:
+                target_time = gmt_now - timedelta(hours=hour_offset)
+                filename = f"HourlyData_{target_time.strftime('%Y%m%d%H')}.dat"
+                # Try the current/today directory first
+                url = f"{self.base_url}/airnow/today/{filename}"
+                print(f"🔍 Trying: {url}")
+                response = requests.get(url, timeout=30)
+                if response.status_code == 200 and response.text.strip():
+                    print(f"✅ SUCCESS: Found data file with {len(response.text.splitlines())} lines")
+                    # Parse the pipe-delimited data
+                    file_data = self.parse_hourly_data_file(response.text)
+                    data.extend(file_data)
+                    if file_data:
+                        print(f"📊 Parsed {len(file_data)} station records from {filename}")
+                        break  # Got data, stop trying
+                # Also try the date-specific directory
+                date_dir = target_time.strftime('%Y/%Y%m%d')
+                url_dated = f"{self.base_url}/airnow/{date_dir}/{filename}"
+                print(f"🔍 Trying: {url_dated}")
+                response = requests.get(url_dated, timeout=30)
+                if response.status_code == 200 and response.text.strip():
+                    print(f"✅ SUCCESS: Found data in dated directory")
+                    file_data = self.parse_hourly_data_file(response.text)
+                    data.extend(file_data)
+                    if file_data:
+                        print(f"📊 Parsed {len(file_data)} station records from dated directory")
+                        break
             except Exception as e:
+                print(f"❌ Error trying hour {hour_offset}: {str(e)}")
                 continue
+            time.sleep(0.1)  # Be nice to the server
         return data
+    def get_reporting_areas_data(self) -> List[Dict]:
+        """Get reporting areas data which also contains station info"""
+        try:
+            url = f"{self.base_url}/airnow/today/reportingarea.dat"
+            print(f"🔍 Trying reporting areas: {url}")
+            response = requests.get(url, timeout=30)
+            if response.status_code == 200 and response.text.strip():
+                print(f"✅ Found reporting areas file")
+                return self.parse_reporting_areas_file(response.text)
+        except Exception as e:
+            print(f"❌ Error getting reporting areas: {str(e)}")
+        return []
+    def parse_hourly_data_file(self, text: str) -> List[Dict]:
+        """Parse the official AirNow hourly data format"""
+        lines = text.strip().split('\n')
         data = []
+        print(f"🔧 Parsing {len(lines)} lines...")
+        for line_num, line in enumerate(lines):
+            if not line.strip():
+                continue
             try:
+                # Split by pipe delimiter as per official format
+                fields = line.split('|')
+                if len(fields) >= 9:  # Minimum required fields
+                    record = {
+                        'DateObserved': fields[0],           # Valid date
+                        'HourObserved': fields[1],           # Valid time
+                        'AQSID': fields[2],                  # AQS ID
+                        'SiteName': fields[3],               # Site name
+                        'GMTOffset': int(fields[4]) if fields[4].lstrip('-').isdigit() else 0,
+                        'ParameterName': fields[5],          # Parameter name
+                        'ReportingUnits': fields[6],         # Units
+                        'Value': float(fields[7]) if fields[7].replace('.','').replace('-','').isdigit() else 0,
+                        'DataSource': fields[8] if len(fields) > 8 else '',
+                        'source': 'hourly_bulk_file'
+                    }
+                    # Calculate approximate lat/lon based on GMT offset (rough estimate)
+                    # This is very approximate - you'd need a separate lookup for exact coordinates
+                    record['Latitude'] = 39.0  # Default center US
+                    record['Longitude'] = -98.0 + (record['GMTOffset'] * 15)  # Rough longitude from timezone
+                    # Calculate AQI if it's a criteria pollutant
+                    record['AQI'] = self.calculate_aqi(record['ParameterName'], record['Value'])
+                    data.append(record)
+            except Exception as e:
+                print(f"❌ Error parsing line {line_num}: {str(e)}")
+                continue
+        print(f"✅ Successfully parsed {len(data)} records")
         return data
+    def parse_reporting_areas_file(self, text: str) -> List[Dict]:
+        """Parse the reporting areas file format"""
         lines = text.strip().split('\n')
         data = []
         for line in lines:
+            if not line.strip():
                 continue
+            try:
+                fields = line.split('|')
+                if len(fields) >= 8:
                     record = {
+                        'DateObserved': fields[0],
+                        'HourObserved': fields[1],
+                        'ReportingArea': fields[3],
+                        'StateCode': fields[4],
+                        'Latitude': float(fields[5]) if fields[5] else 0,
+                        'Longitude': float(fields[6]) if fields[6] else 0,
+                        'ParameterName': fields[7],
+                        'AQI': int(fields[8]) if fields[8].isdigit() else 0,
+                        'CategoryName': fields[9] if len(fields) > 9 else '',
+                        'source': 'reporting_areas_file'
                     }
                     data.append(record)
             except:
                 continue
         return data
+    def calculate_aqi(self, parameter: str, value: float) -> int:
+        """Calculate AQI for common parameters"""
+        if parameter == 'OZONE' and value > 0:
+            # Simplified ozone AQI calculation (8-hour average basis)
+            if value <= 54: return int((50/54) * value)
+            elif value <= 70: return int(51 + (49/16) * (value - 54))
+            elif value <= 85: return int(101 + (49/15) * (value - 70))
+            elif value <= 105: return int(151 + (49/20) * (value - 85))
+            else: return int(201 + (199/95) * min(value - 105, 95))
+        elif parameter == 'PM2.5' and value >= 0:
+            # PM2.5 AQI calculation
+            if value <= 12.0: return int((50/12) * value)
+            elif value <= 35.4: return int(51 + (49/23.4) * (value - 12))
+            elif value <= 55.4: return int(101 + (49/20) * (value - 35.4))
+            elif value <= 150.4: return int(151 + (49/95) * (value - 55.4))
+            else: return int(201 + (199/149.6) * min(value - 150.4, 149.6))
+        elif parameter == 'PM10' and value >= 0:
+            # PM10 AQI calculation
+            if value <= 54: return int((50/54) * value)
+            elif value <= 154: return int(51 + (49/100) * (value - 54))
+            elif value <= 254: return int(101 + (49/100) * (value - 154))
+            elif value <= 354: return int(151 + (49/100) * (value - 254))
+            else: return int(201 + (199/146) * min(value - 354, 146))
+        return 0  # Default for non-AQI parameters
+    def get_all_stations(self) -> List[Dict]:
+        """Get data from all available sources"""
+        print("🚀 Fetching AirNow bulk station data...")
+        all_data = []
+        # Get hourly data (contains ALL active monitoring stations)
+        print("\n📊 Getting hourly monitoring data...")
+        hourly_data = self.get_current_hourly_data()
+        all_data.extend(hourly_data)
+        # Get reporting areas data
+        print("\n🌍 Getting reporting areas data...")
+        areas_data = self.get_reporting_areas_data()
+        all_data.extend(areas_data)
+        # Deduplicate based on site ID and parameter
+        print(f"\n🔧 Deduplicating {len(all_data)} records...")
         unique_data = []
+        seen = set()
+        for record in all_data:
+            key = (record.get('AQSID', ''), record.get('ParameterName', ''), record.get('SiteName', ''))
+            if key not in seen:
+                seen.add(key)
+                unique_data.append(record)
+        print(f"✅ Final result: {len(unique_data)} unique monitoring stations")
         return unique_data
+# Usage example
+if __name__ == "__main__":
+    fetcher = AirNowBulkFetcher()
+    # Get all stations data
+    stations_data = fetcher.get_all_stations()
+    if stations_data:
+        # Convert to DataFrame for easy viewing
+        df = pd.DataFrame(stations_data)
+        print(f"\n📈 Data Summary:")
+        print(f"Total stations: {len(df)}")
+        print(f"Parameters monitored: {df['ParameterName'].nunique()}")
+        print(f"Unique sites: {df['SiteName'].nunique()}")
+        print(f"\nParameter breakdown:")
+        print(df['ParameterName'].value_counts().head(10))
+        print(f"\nSample records:")
+        print(df[['SiteName', 'ParameterName', 'Value', 'AQI', 'DataSource']].head(10))
+        # Save to CSV
+        df.to_csv('airnow_stations_data.csv', index=False)
+        print(f"\n💾 Data saved to 'airnow_stations_data.csv'")
+    else:
+        print("❌ No data retrieved. Check the AirNow file servers.")