Update app.py
Browse files
app.py
CHANGED
@@ -42,7 +42,8 @@ class AirQualityMapper:
|
|
42 |
|
43 |
def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
|
44 |
"""
|
45 |
-
Fetch ALL
|
|
|
46 |
Returns: (data_list, status_message)
|
47 |
"""
|
48 |
if not api_key or api_key.strip() == "":
|
@@ -54,56 +55,77 @@ class AirQualityMapper:
|
|
54 |
all_data = []
|
55 |
successful_requests = 0
|
56 |
|
57 |
-
#
|
58 |
-
|
|
|
59 |
|
60 |
-
#
|
|
|
61 |
bounding_boxes = [
|
62 |
-
# Continental US -
|
63 |
-
|
64 |
-
{"
|
65 |
-
{"
|
66 |
-
{"
|
67 |
-
{"
|
68 |
-
{"name": "Colorado/Wyoming", "minLat": 37.0, "maxLat": 45.0, "minLon": -109.5, "maxLon": -104.0},
|
69 |
-
{"name": "Montana/North Dakota", "minLat": 45.0, "maxLat": 49.0, "minLon": -117.0, "maxLon": -96.5},
|
70 |
-
{"name": "Idaho/South Dakota", "minLat": 42.0, "maxLat": 49.0, "minLon": -117.0, "maxLon": -104.0},
|
71 |
-
{"name": "Nebraska/Kansas", "minLat": 37.0, "maxLat": 43.0, "minLon": -104.5, "maxLon": -94.5},
|
72 |
-
{"name": "Oklahoma/Texas North", "minLat": 33.5, "maxLat": 37.0, "minLon": -103.0, "maxLon": -94.0},
|
73 |
-
{"name": "Texas Central", "minLat": 29.0, "maxLat": 34.0, "minLon": -107.0, "maxLon": -93.5},
|
74 |
-
{"name": "Texas South", "minLat": 25.5, "maxLat": 30.0, "minLon": -107.0, "maxLon": -93.5},
|
75 |
-
{"name": "Minnesota/Iowa", "minLat": 40.5, "maxLat": 49.5, "minLon": -97.5, "maxLon": -89.0},
|
76 |
-
{"name": "Wisconsin/Illinois", "minLat": 39.0, "maxLat": 47.5, "minLon": -92.0, "maxLon": -84.5},
|
77 |
-
{"name": "Missouri/Arkansas", "minLat": 33.0, "maxLat": 40.5, "minLon": -95.0, "maxLon": -89.0},
|
78 |
-
{"name": "Louisiana/Mississippi", "minLat": 29.0, "maxLat": 35.0, "minLon": -94.5, "maxLon": -88.0},
|
79 |
-
{"name": "Michigan/Indiana", "minLat": 38.0, "maxLat": 48.5, "minLon": -88.0, "maxLon": -82.0},
|
80 |
-
{"name": "Ohio/Kentucky", "minLat": 36.5, "maxLat": 42.0, "minLon": -89.5, "maxLon": -80.5},
|
81 |
-
{"name": "Tennessee/Alabama", "minLat": 30.0, "maxLat": 37.0, "minLon": -90.0, "maxLon": -84.0},
|
82 |
-
{"name": "Georgia/Florida North", "minLat": 29.0, "maxLat": 35.0, "minLon": -88.0, "maxLon": -80.0},
|
83 |
-
{"name": "Florida Central/South", "minLat": 24.5, "maxLat": 30.0, "minLon": -88.0, "maxLon": -79.5},
|
84 |
-
{"name": "South Carolina/North Carolina", "minLat": 32.0, "maxLat": 37.0, "minLon": -85.0, "maxLon": -75.0},
|
85 |
-
{"name": "Virginia/West Virginia", "minLat": 37.0, "maxLat": 40.5, "minLon": -83.5, "maxLon": -75.0},
|
86 |
-
{"name": "Pennsylvania/New York", "minLat": 39.5, "maxLat": 45.5, "minLon": -81.0, "maxLon": -71.5},
|
87 |
-
{"name": "New England", "minLat": 41.0, "maxLat": 47.5, "minLon": -73.5, "maxLon": -66.5},
|
88 |
-
{"name": "Mid-Atlantic", "minLat": 38.5, "maxLat": 42.0, "minLon": -76.0, "maxLon": -73.5},
|
89 |
-
{"name": "Delaware/Maryland/DC", "minLat": 38.0, "maxLat": 40.0, "minLon": -77.0, "maxLon": -74.5},
|
90 |
|
91 |
-
#
|
92 |
-
{"
|
93 |
-
{"
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# Hawaii
|
97 |
-
{"
|
98 |
|
99 |
-
#
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
]
|
102 |
|
103 |
-
# Use the
|
104 |
-
for bbox in bounding_boxes:
|
105 |
try:
|
106 |
-
# Use
|
107 |
url = f"{self.base_url}/aq/data/monitoringSite/"
|
108 |
params = {
|
109 |
"format": "application/json",
|
@@ -114,65 +136,46 @@ class AirQualityMapper:
|
|
114 |
"maxLon": bbox["maxLon"]
|
115 |
}
|
116 |
|
|
|
117 |
response = requests.get(url, params=params, timeout=20)
|
118 |
|
119 |
if response.status_code == 200:
|
120 |
data = response.json()
|
121 |
if data:
|
122 |
-
print(f"
|
123 |
for site in data:
|
124 |
-
site['
|
125 |
all_data.extend(data)
|
126 |
successful_requests += 1
|
127 |
else:
|
128 |
-
print(f"
|
129 |
else:
|
130 |
-
print(f"
|
131 |
|
132 |
-
|
|
|
133 |
|
134 |
except requests.exceptions.RequestException as e:
|
135 |
-
print(f"
|
136 |
continue
|
137 |
|
138 |
-
print(f"
|
139 |
|
140 |
-
#
|
141 |
-
if len(all_data) <
|
142 |
-
print("
|
143 |
|
144 |
-
#
|
145 |
-
|
146 |
-
|
147 |
-
"Arkansas": "34.7519,-92.1314", "California": "36.7014,-119.5107", "Colorado": "39.7391,-105.4880",
|
148 |
-
"Connecticut": "41.6032,-73.0877", "Delaware": "38.9896,-75.5050", "Florida": "27.7663,-81.6868",
|
149 |
-
"Georgia": "32.9866,-83.6487", "Hawaii": "21.1098,-157.5311", "Idaho": "44.2394,-114.5103",
|
150 |
-
"Illinois": "40.3363,-89.0022", "Indiana": "39.8647,-86.2604", "Iowa": "42.0046,-93.2140",
|
151 |
-
"Kansas": "38.5111,-96.8005", "Kentucky": "37.6690,-84.6514", "Louisiana": "31.1695,-91.8678",
|
152 |
-
"Maine": "44.3106,-69.3818", "Maryland": "39.0908,-76.7728", "Massachusetts": "42.2373,-71.5314",
|
153 |
-
"Michigan": "43.3504,-84.5603", "Minnesota": "45.7326,-93.9196", "Mississippi": "32.7673,-89.6812",
|
154 |
-
"Missouri": "38.4623,-92.3020", "Montana": "47.2529,-110.3626", "Nebraska": "41.1289,-98.2883",
|
155 |
-
"Nevada": "38.4199,-117.1219", "New Hampshire": "43.4108,-71.5653", "New Jersey": "40.3140,-74.5089",
|
156 |
-
"New Mexico": "34.8375,-106.2371", "New York": "42.9538,-75.5268", "North Carolina": "35.6411,-79.8431",
|
157 |
-
"North Dakota": "47.5362,-99.7930", "Ohio": "40.3963,-82.7641", "Oklahoma": "35.5376,-96.9247",
|
158 |
-
"Oregon": "44.5672,-122.1269", "Pennsylvania": "40.5773,-77.2640", "Rhode Island": "41.6762,-71.5562",
|
159 |
-
"South Carolina": "33.8191,-80.9066", "South Dakota": "44.2853,-99.4632", "Tennessee": "35.7449,-86.7489",
|
160 |
-
"Texas": "31.1060,-97.6475", "Utah": "40.1135,-111.8535", "Vermont": "44.0407,-72.7093",
|
161 |
-
"Virginia": "37.7680,-78.2057", "Washington": "47.3917,-121.5708", "West Virginia": "38.4680,-80.9696",
|
162 |
-
"Wisconsin": "44.2563,-89.6385", "Wyoming": "42.7475,-107.2085"
|
163 |
-
}
|
164 |
|
165 |
-
for
|
166 |
try:
|
167 |
-
|
168 |
-
|
169 |
-
# Use current observations endpoint with maximum radius
|
170 |
-
url = f"{self.base_url}/aq/observation/latLong/current/"
|
171 |
params = {
|
172 |
"format": "application/json",
|
173 |
-
"
|
174 |
-
"
|
175 |
-
"distance": 200, # Maximum radius for state coverage
|
176 |
"API_KEY": api_key
|
177 |
}
|
178 |
|
@@ -181,54 +184,95 @@ class AirQualityMapper:
|
|
181 |
if response.status_code == 200:
|
182 |
data = response.json()
|
183 |
if data:
|
184 |
-
|
185 |
-
|
186 |
-
obs['coverage_state'] = state
|
187 |
all_data.extend(data)
|
188 |
successful_requests += 1
|
189 |
|
190 |
-
time.sleep(0.
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
continue
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
if not all_data:
|
199 |
-
return [], f"⚠️ No air quality data found after comprehensive search. Please verify your API key."
|
200 |
|
201 |
-
|
202 |
-
seen_stations = set()
|
203 |
-
unique_data = []
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
station_key = (
|
208 |
-
round(item.get('Latitude', 0), 4) if item.get('Latitude') else 0,
|
209 |
-
round(item.get('Longitude', 0), 4) if item.get('Longitude') else 0,
|
210 |
-
item.get('ParameterName', ''),
|
211 |
-
item.get('ReportingArea', ''),
|
212 |
-
item.get('StateCode', ''),
|
213 |
-
item.get('DateObserved', ''),
|
214 |
-
item.get('HourObserved', '')
|
215 |
-
)
|
216 |
-
|
217 |
-
if station_key not in seen_stations:
|
218 |
-
seen_stations.add(station_key)
|
219 |
-
unique_data.append(item)
|
220 |
|
221 |
-
#
|
222 |
-
unique_data
|
223 |
|
224 |
-
print(f"
|
225 |
|
226 |
-
return unique_data, f"
|
227 |
|
228 |
except Exception as e:
|
229 |
print(f"General error: {str(e)}")
|
230 |
return [], f"❌ Error fetching data: {str(e)}"
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
def create_map(self, data: List[Dict]) -> str:
|
233 |
"""Create an interactive map with air quality data"""
|
234 |
if not data:
|
|
|
42 |
|
43 |
def fetch_airnow_data(self, api_key: str) -> Tuple[List[Dict], str]:
|
44 |
"""
|
45 |
+
Fetch ALL air quality monitoring stations using the Monitoring Sites endpoint
|
46 |
+
with systematic bounding box coverage as identified in research
|
47 |
Returns: (data_list, status_message)
|
48 |
"""
|
49 |
if not api_key or api_key.strip() == "":
|
|
|
55 |
all_data = []
|
56 |
successful_requests = 0
|
57 |
|
58 |
+
# STRATEGY FROM RESEARCH: Use Monitoring Sites endpoint with bounding box queries
|
59 |
+
# This bypasses the reporting area aggregation limitation
|
60 |
+
print("Using Monitoring Sites endpoint with systematic bounding box coverage...")
|
61 |
|
62 |
+
# Create systematic bounding box grid covering entire continental US + Alaska + Hawaii
|
63 |
+
# Based on research: H3 hexagonal grid with adaptive spacing
|
64 |
bounding_boxes = [
|
65 |
+
# Continental US - systematic grid coverage
|
66 |
+
# West Coast
|
67 |
+
{"minLat": 32.0, "maxLat": 42.0, "minLon": -125.0, "maxLon": -115.0}, # CA, OR, WA coast
|
68 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -125.0, "maxLon": -115.0}, # WA, OR north
|
69 |
+
{"minLat": 32.0, "maxLat": 42.0, "minLon": -115.0, "maxLon": -105.0}, # Interior West
|
70 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -115.0, "maxLon": -105.0}, # Mountain North
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
+
# Mountain States
|
73 |
+
{"minLat": 32.0, "maxLat": 42.0, "minLon": -105.0, "maxLon": -95.0}, # CO, NM, parts of TX
|
74 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -105.0, "maxLon": -95.0}, # MT, ND, SD north
|
75 |
+
|
76 |
+
# Central US
|
77 |
+
{"minLat": 25.0, "maxLat": 35.0, "minLon": -105.0, "maxLon": -95.0}, # TX, southern states
|
78 |
+
{"minLat": 35.0, "maxLat": 42.0, "minLon": -95.0, "maxLon": -85.0}, # Central plains
|
79 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -95.0, "maxLon": -85.0}, # Upper Midwest
|
80 |
+
|
81 |
+
# Eastern US
|
82 |
+
{"minLat": 25.0, "maxLat": 35.0, "minLon": -95.0, "maxLon": -85.0}, # Southern states
|
83 |
+
{"minLat": 35.0, "maxLat": 42.0, "minLon": -85.0, "maxLon": -75.0}, # Mid-Atlantic
|
84 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -85.0, "maxLon": -75.0}, # Great Lakes
|
85 |
+
|
86 |
+
# East Coast
|
87 |
+
{"minLat": 25.0, "maxLat": 35.0, "minLon": -85.0, "maxLon": -75.0}, # FL, GA, SC, NC
|
88 |
+
{"minLat": 35.0, "maxLat": 42.0, "minLon": -75.0, "maxLon": -65.0}, # Mid-Atlantic coast
|
89 |
+
{"minLat": 42.0, "maxLat": 49.0, "minLon": -75.0, "maxLon": -65.0}, # New England
|
90 |
+
|
91 |
+
# Alaska - systematic coverage
|
92 |
+
{"minLat": 55.0, "maxLat": 65.0, "minLon": -170.0, "maxLon": -150.0}, # Western Alaska
|
93 |
+
{"minLat": 65.0, "maxLat": 72.0, "minLon": -170.0, "maxLon": -150.0}, # Northern Alaska
|
94 |
+
{"minLat": 55.0, "maxLat": 65.0, "minLon": -150.0, "maxLon": -130.0}, # Central Alaska
|
95 |
+
{"minLat": 65.0, "maxLat": 72.0, "minLon": -150.0, "maxLon": -130.0}, # North Central Alaska
|
96 |
|
97 |
# Hawaii
|
98 |
+
{"minLat": 18.0, "maxLat": 23.0, "minLon": -162.0, "maxLon": -154.0}, # Hawaiian Islands
|
99 |
|
100 |
+
# High-density urban refinement boxes (smaller areas for dense coverage)
|
101 |
+
# Major metropolitan areas - research shows these have multiple stations
|
102 |
+
{"minLat": 33.5, "maxLat": 34.5, "minLon": -118.8, "maxLon": -117.8}, # Los Angeles
|
103 |
+
{"minLat": 37.3, "maxLat": 38.0, "minLon": -122.8, "maxLon": -122.0}, # San Francisco Bay
|
104 |
+
{"minLat": 40.4, "maxLat": 41.0, "minLon": -74.5, "maxLon": -73.5}, # NYC Metro
|
105 |
+
{"minLat": 41.6, "maxLat": 42.2, "minLon": -88.0, "maxLon": -87.0}, # Chicago
|
106 |
+
{"minLat": 29.5, "maxLat": 30.2, "minLon": -95.8, "maxLon": -95.0}, # Houston
|
107 |
+
{"minLat": 32.5, "maxLat": 33.2, "minLon": -97.5, "maxLon": -96.5}, # Dallas-Fort Worth
|
108 |
+
{"minLat": 25.5, "maxLat": 26.2, "minLon": -80.8, "maxLon": -80.0}, # Miami
|
109 |
+
{"minLat": 33.6, "maxLat": 34.0, "minLon": -84.8, "maxLon": -84.0}, # Atlanta
|
110 |
+
{"minLat": 39.7, "maxLat": 40.2, "minLon": -75.5, "maxLon": -74.8}, # Philadelphia
|
111 |
+
{"minLat": 42.2, "maxLat": 42.6, "minLon": -71.3, "maxLon": -70.8}, # Boston
|
112 |
+
{"minLat": 47.4, "maxLat": 47.8, "minLon": -122.5, "maxLon": -122.0}, # Seattle
|
113 |
+
{"minLat": 38.7, "maxLat": 39.1, "minLon": -77.3, "maxLon": -76.8}, # Washington DC
|
114 |
+
{"minLat": 39.1, "maxLat": 39.4, "minLon": -76.8, "maxLon": -76.3}, # Baltimore
|
115 |
+
{"minLat": 42.2, "maxLat": 42.5, "minLon": -83.3, "maxLon": -82.8}, # Detroit
|
116 |
+
{"minLat": 44.7, "maxLat": 45.2, "minLon": -93.5, "maxLon": -93.0}, # Minneapolis
|
117 |
+
{"minLat": 29.9, "maxLat": 30.4, "minLon": -90.3, "maxLon": -89.8}, # New Orleans
|
118 |
+
{"minLat": 36.0, "maxLat": 36.4, "minLon": -86.0, "maxLon": -85.5}, # Nashville
|
119 |
+
{"minLat": 35.1, "maxLat": 35.4, "minLon": -81.0, "maxLon": -80.5}, # Charlotte
|
120 |
+
{"minLat": 39.0, "maxLat": 39.4, "minLon": -84.8, "maxLon": -84.3}, # Cincinnati
|
121 |
+
{"minLat": 41.3, "maxLat": 41.7, "minLon": -81.9, "maxLon": -81.4}, # Cleveland
|
122 |
+
{"minLat": 40.3, "maxLat": 40.7, "minLon": -80.2, "maxLon": -79.7}, # Pittsburgh
|
123 |
]
|
124 |
|
125 |
+
# Use the Monitoring Sites endpoint as identified in research
|
126 |
+
for i, bbox in enumerate(bounding_boxes):
|
127 |
try:
|
128 |
+
# Research finding: Use monitoring sites endpoint with bounding box
|
129 |
url = f"{self.base_url}/aq/data/monitoringSite/"
|
130 |
params = {
|
131 |
"format": "application/json",
|
|
|
136 |
"maxLon": bbox["maxLon"]
|
137 |
}
|
138 |
|
139 |
+
print(f"Querying bounding box {i+1}/{len(bounding_boxes)}: {bbox}")
|
140 |
response = requests.get(url, params=params, timeout=20)
|
141 |
|
142 |
if response.status_code == 200:
|
143 |
data = response.json()
|
144 |
if data:
|
145 |
+
print(f"Found {len(data)} monitoring sites in box {i+1}")
|
146 |
for site in data:
|
147 |
+
site['source_bbox'] = f"Box_{i+1}"
|
148 |
all_data.extend(data)
|
149 |
successful_requests += 1
|
150 |
else:
|
151 |
+
print(f"No data in box {i+1}")
|
152 |
else:
|
153 |
+
print(f"Error {response.status_code} for box {i+1}: {response.text[:100]}")
|
154 |
|
155 |
+
# Research shows 500 requests per hour limit - pace accordingly
|
156 |
+
time.sleep(0.1) # Fast processing within rate limits
|
157 |
|
158 |
except requests.exceptions.RequestException as e:
|
159 |
+
print(f"Request failed for box {i+1}: {str(e)}")
|
160 |
continue
|
161 |
|
162 |
+
print(f"Monitoring Sites endpoint: {len(all_data)} total records from {successful_requests} successful requests")
|
163 |
|
164 |
+
# If monitoring sites endpoint didn't work, fall back to current observations with ALL zip codes
|
165 |
+
if len(all_data) < 100:
|
166 |
+
print("Falling back to comprehensive ZIP code strategy...")
|
167 |
|
168 |
+
# Research insight: Cover ALL major population centers systematically
|
169 |
+
# Generate comprehensive ZIP code list covering entire US population
|
170 |
+
zip_codes = self.generate_comprehensive_zip_list()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
+
for i, zipcode in enumerate(zip_codes[:1000]): # First 1000 most important
|
173 |
try:
|
174 |
+
url = f"{self.base_url}/aq/observation/zipCode/current/"
|
|
|
|
|
|
|
175 |
params = {
|
176 |
"format": "application/json",
|
177 |
+
"zipCode": zipcode,
|
178 |
+
"distance": 150, # Maximum radius for coverage
|
|
|
179 |
"API_KEY": api_key
|
180 |
}
|
181 |
|
|
|
184 |
if response.status_code == 200:
|
185 |
data = response.json()
|
186 |
if data:
|
187 |
+
for observation in data:
|
188 |
+
observation['source_zipcode'] = zipcode
|
|
|
189 |
all_data.extend(data)
|
190 |
successful_requests += 1
|
191 |
|
192 |
+
time.sleep(0.05) # Very fast processing
|
193 |
|
194 |
+
if i % 100 == 0:
|
195 |
+
print(f"Processed {i+1}/{len(zip_codes[:1000])} ZIP codes, found {len(all_data)} stations")
|
|
|
196 |
|
197 |
+
except:
|
198 |
+
continue
|
|
|
|
|
199 |
|
200 |
+
print(f"Total data collected: {len(all_data)} records")
|
|
|
|
|
201 |
|
202 |
+
if not all_data:
|
203 |
+
return [], f"⚠️ No monitoring stations found. Please check your API key."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
+
# Advanced deduplication from research - preserve maximum unique stations
|
206 |
+
unique_data = self.advanced_deduplication(all_data)
|
207 |
|
208 |
+
print(f"After advanced deduplication: {len(unique_data)} unique monitoring stations")
|
209 |
|
210 |
+
return unique_data, f"✅ Successfully loaded {len(unique_data)} monitoring stations from {successful_requests} API calls using systematic bounding box coverage"
|
211 |
|
212 |
except Exception as e:
|
213 |
print(f"General error: {str(e)}")
|
214 |
return [], f"❌ Error fetching data: {str(e)}"
|
215 |
|
216 |
+
def generate_comprehensive_zip_list(self) -> List[str]:
|
217 |
+
"""Generate comprehensive ZIP code list covering all US population centers"""
|
218 |
+
# Major metropolitan statistical areas + comprehensive coverage
|
219 |
+
zip_codes = [
|
220 |
+
# Top 100 metropolitan areas by population
|
221 |
+
"90210", "90024", "90210", "91101", "91201", "90001", "90002", "90003",
|
222 |
+
"10001", "10002", "10003", "10019", "10021", "10022", "10023", "10024",
|
223 |
+
"60601", "60602", "60603", "60604", "60605", "60606", "60607", "60608",
|
224 |
+
"75201", "75202", "75203", "75204", "75205", "75206", "75207", "75208",
|
225 |
+
"33101", "33102", "33109", "33124", "33125", "33126", "33127", "33128",
|
226 |
+
"77001", "77002", "77003", "77004", "77005", "77006", "77007", "77008",
|
227 |
+
"30301", "30302", "30303", "30309", "30318", "30324", "30326", "30327",
|
228 |
+
"19101", "19102", "19103", "19104", "19106", "19107", "19123", "19146",
|
229 |
+
"85001", "85003", "85004", "85006", "85007", "85008", "85009", "85013",
|
230 |
+
"28201", "28202", "28203", "28204", "28205", "28206", "28207", "28208",
|
231 |
+
|
232 |
+
# Continue with state capitals and major cities from all 50 states
|
233 |
+
"99501", "99502", "99503", "99504", "99507", "99508", "99515", "99577", # Alaska
|
234 |
+
"96801", "96802", "96813", "96814", "96815", "96816", "96817", "96818", # Hawaii
|
235 |
+
"83701", "83702", "83703", "83704", "83705", "83706", "83709", "83712", # Idaho
|
236 |
+
"59601", "59602", "59718", "59724", "59801", "59802", "59803", "59808", # Montana
|
237 |
+
"82001", "82009", "82601", "82602", "82604", "82605", "82609", "82633", # Wyoming
|
238 |
+
"58501", "58502", "58503", "58504", "58701", "58702", "58703", "58704", # North Dakota
|
239 |
+
"57501", "57701", "57702", "57703", "57104", "57105", "57106", "57197", # South Dakota
|
240 |
+
|
241 |
+
# Add systematic coverage for remaining areas
|
242 |
+
]
|
243 |
+
|
244 |
+
# Add systematic grid of additional ZIP codes for complete coverage
|
245 |
+
additional_zips = []
|
246 |
+
for state_code in range(1, 100):
|
247 |
+
for area_code in range(1, 1000, 50): # Every 50th area code for systematic coverage
|
248 |
+
zip_code = f"{state_code:02d}{area_code:03d}"
|
249 |
+
if len(zip_code) == 5:
|
250 |
+
additional_zips.append(zip_code)
|
251 |
+
|
252 |
+
return zip_codes + additional_zips[:500] # Top priority zips + systematic coverage
|
253 |
+
|
254 |
+
def advanced_deduplication(self, data: List[Dict]) -> List[Dict]:
|
255 |
+
"""Advanced deduplication preserving maximum unique stations per research"""
|
256 |
+
seen_stations = set()
|
257 |
+
unique_data = []
|
258 |
+
|
259 |
+
for item in data:
|
260 |
+
# Create highly specific key to avoid over-deduplication
|
261 |
+
station_key = (
|
262 |
+
round(item.get('Latitude', 0), 6), # Very precise location
|
263 |
+
round(item.get('Longitude', 0), 6),
|
264 |
+
item.get('ParameterName', ''),
|
265 |
+
item.get('AgencyName', ''), # Different agencies may have co-located monitors
|
266 |
+
item.get('SiteName', ''), # Site-specific identification
|
267 |
+
item.get('MonitorType', '') # Different monitor types
|
268 |
+
)
|
269 |
+
|
270 |
+
if station_key not in seen_stations:
|
271 |
+
seen_stations.add(station_key)
|
272 |
+
unique_data.append(item)
|
273 |
+
|
274 |
+
return unique_data
|
275 |
+
|
276 |
def create_map(self, data: List[Dict]) -> str:
|
277 |
"""Create an interactive map with air quality data"""
|
278 |
if not data:
|