nakas commited on
Commit
503ee98
·
verified ·
1 Parent(s): 76560e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -66
app.py CHANGED
@@ -27,9 +27,9 @@ def get_raw_data(station_id):
27
  'Accept': 'application/json'
28
  }
29
 
30
- # Calculate correct date range for last 3 days
31
  end_time = datetime.utcnow()
32
- start_time = end_time - timedelta(hours=72) # Last 3 days
33
 
34
  params = {
35
  'start': start_time.isoformat() + 'Z',
@@ -56,11 +56,10 @@ def get_raw_data(station_id):
56
  if len(data['features']) > 0:
57
  print("\nFirst observation properties:")
58
  print(json.dumps(data['features'][0]['properties'], indent=2))
59
-
60
- print("\nAll available property keys:")
61
  keys = set()
62
  for feature in data['features']:
63
  keys.update(feature['properties'].keys())
 
64
  print(sorted(list(keys)))
65
 
66
  return data
@@ -73,7 +72,7 @@ def get_raw_data(station_id):
73
  def scrape_snow_depth():
74
  """
75
  Uses Selenium with a headless browser to load the weather.gov timeseries page,
76
- waits for the table to load, then extracts the snow depth data.
77
  Returns a DataFrame with columns "timestamp" and "snowDepth".
78
  """
79
  url = ("https://www.weather.gov/wrh/timeseries?"
@@ -86,20 +85,20 @@ def scrape_snow_depth():
86
  chrome_options.add_argument("--no-sandbox")
87
  chrome_options.add_argument("--disable-dev-shm-usage")
88
 
89
- # Initialize Chrome using webdriver_manager so that the correct driver is installed automatically
90
  driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
91
 
92
  driver.get(url)
93
 
94
  try:
95
- # Wait (up to 15 seconds) for at least one <table> element to appear in the DOM
96
- WebDriverWait(driver, 15).until(
97
- EC.presence_of_element_located((By.TAG_NAME, "table"))
98
  )
99
  except Exception as e:
100
- print("Timeout waiting for table to load:", e)
101
 
102
- # Give extra time for any dynamic data to load
103
  time.sleep(5)
104
 
105
  page_source = driver.page_source
@@ -107,26 +106,29 @@ def scrape_snow_depth():
107
 
108
  soup = BeautifulSoup(page_source, 'html.parser')
109
 
110
- # Look through all tables for one with headers that include both "time" and "snow"
111
  tables = soup.find_all("table")
112
  target_table = None
113
  for table in tables:
114
- header_row = table.find("tr")
115
- if not header_row:
116
- continue
117
- headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
118
- print("Found table headers:", headers)
119
- if any("time" in h.lower() for h in headers) and any("snow" in h.lower() for h in headers):
120
  target_table = table
121
  break
122
 
123
  if target_table is None:
124
- print("No table with required headers found.")
125
  return pd.DataFrame()
126
 
127
- # Determine column indices using case-insensitive matching
128
  header_row = target_table.find("tr")
 
 
 
129
  headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
 
 
 
130
  time_index = None
131
  snow_index = None
132
  for i, header in enumerate(headers):
@@ -135,10 +137,10 @@ def scrape_snow_depth():
135
  if "snow" in header.lower():
136
  snow_index = i
137
  if time_index is None or snow_index is None:
138
- print("Required columns not found in the table headers.")
139
  return pd.DataFrame()
140
 
141
- # Extract data rows (skipping the header row)
142
  data = []
143
  rows = target_table.find_all("tr")[1:]
144
  for row in rows:
@@ -150,18 +152,19 @@ def scrape_snow_depth():
150
  data.append((time_text, snow_text))
151
 
152
  df = pd.DataFrame(data, columns=["Time", "Snow Depth"])
153
- # Convert "Time" to datetime objects
154
  df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
155
- # Convert "Snow Depth" to numeric values (in inches)
156
  df["Snow Depth"] = pd.to_numeric(df["Snow Depth"], errors="coerce")
157
  print("Scraped snow depth data:")
158
  print(df.head())
159
- # Rename columns to match our API data
 
160
  return df.rename(columns={"Time": "timestamp", "Snow Depth": "snowDepth"})
161
 
162
  def parse_raw_data(data):
163
  """
164
- Parse the raw JSON data from the API into a DataFrame.
165
  """
166
  if not data or 'features' not in data:
167
  return None
@@ -169,8 +172,7 @@ def parse_raw_data(data):
169
  records = []
170
  for feature in data['features']:
171
  props = feature['properties']
172
-
173
- # Extract any properties with "snow" in their key (if present)
174
  snow_fields = {k: v for k, v in props.items() if 'snow' in k.lower()}
175
  if snow_fields:
176
  print("\nFound snow-related fields:")
@@ -183,8 +185,7 @@ def parse_raw_data(data):
183
  'wind_speed': props.get('windSpeed', {}).get('value'),
184
  'wind_direction': props.get('windDirection', {}).get('value')
185
  }
186
-
187
- # Add any snow-related fields to the record
188
  for k, v in snow_fields.items():
189
  if isinstance(v, dict) and 'value' in v:
190
  record[k] = v['value']
@@ -192,9 +193,8 @@ def parse_raw_data(data):
192
  record[k] = v
193
 
194
  records.append(record)
195
-
196
- df = pd.DataFrame(records)
197
 
 
198
  print("\nDataFrame columns from API:")
199
  print(df.columns.tolist())
200
  print("\nSample of raw API data:")
@@ -228,7 +228,7 @@ def create_wind_rose(ax, data, title):
228
  Create a wind rose subplot.
229
  """
230
  if data.empty or data['wind_direction'].isna().all() or data['wind_speed'].isna().all():
231
- ax.text(0.5, 0.5, 'No wind data available',
232
  horizontalalignment='center',
233
  verticalalignment='center',
234
  transform=ax.transAxes)
@@ -236,44 +236,35 @@ def create_wind_rose(ax, data, title):
236
  return
237
 
238
  plot_data = data.copy()
239
-
240
  direction_bins = np.arange(0, 361, 45)
241
  directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
242
-
243
  mask = plot_data['wind_direction'].notna() & plot_data['wind_speed'].notna()
244
  plot_data = plot_data[mask]
245
-
246
  if plot_data.empty:
247
- ax.text(0.5, 0.5, 'No valid wind data',
248
  horizontalalignment='center',
249
  verticalalignment='center',
250
  transform=ax.transAxes)
251
  ax.set_title(title)
252
  return
253
-
254
- plot_data.loc[:, 'direction_bin'] = pd.cut(plot_data['wind_direction'],
255
- bins=direction_bins,
256
  labels=directions,
257
  include_lowest=True)
258
-
259
  wind_stats = plot_data.groupby('direction_bin', observed=True)['wind_speed'].mean()
260
-
261
  all_directions = pd.Series(0.0, index=directions)
262
  wind_stats = wind_stats.combine_first(all_directions)
263
-
264
  angles = np.linspace(0, 2*np.pi, len(directions), endpoint=False)
265
  values = [wind_stats[d] for d in directions]
266
-
267
  if any(v > 0 for v in values):
268
  ax.bar(angles, values, width=0.5, alpha=0.6)
269
  ax.set_xticks(angles)
270
  ax.set_xticklabels(directions)
271
  else:
272
- ax.text(0.5, 0.5, 'No significant wind',
273
  horizontalalignment='center',
274
  verticalalignment='center',
275
  transform=ax.transAxes)
276
-
277
  ax.set_title(title)
278
 
279
  def create_visualizations(df):
@@ -302,12 +293,11 @@ def create_visualizations(df):
302
  ax2.set_xlabel('')
303
  ax2.grid(True)
304
 
305
- # Plot snow depth if available
306
  if 'snowDepth' in df.columns and not df['snowDepth'].isna().all():
307
  ax3.plot(df['timestamp'], df['snowDepth'], linewidth=2)
308
- ax3.set_ylim(0, 80) # Fixed y-axis limit to 80 inches
309
  else:
310
- ax3.text(0.5, 0.5, 'No snow depth data available',
311
  horizontalalignment='center',
312
  verticalalignment='center',
313
  transform=ax3.transAxes)
@@ -316,7 +306,6 @@ def create_visualizations(df):
316
  ax3.set_xlabel('')
317
  ax3.grid(True)
318
 
319
- # Format x-axis labels
320
  for ax in [ax1, ax2, ax3]:
321
  ax.tick_params(axis='x', rotation=45)
322
  ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M'))
@@ -329,7 +318,6 @@ def create_visualizations(df):
329
  create_wind_rose(ax, day_data, pd.to_datetime(date).strftime('%Y-%m-%d'))
330
 
331
  plt.tight_layout()
332
-
333
  return fig
334
 
335
  def get_weather_data(station_id, hours):
@@ -338,34 +326,29 @@ def get_weather_data(station_id, hours):
338
  Combines API data and scraped snow depth data.
339
  """
340
  try:
341
- # Get raw data from the API
342
  raw_data = get_raw_data(station_id)
343
  if raw_data is None:
344
  return None, "Failed to fetch data from API"
345
-
346
- # Parse raw API data
347
  df = parse_raw_data(raw_data)
348
  if df is None:
349
  return None, "Failed to parse API data"
350
-
351
- # Process the API data
352
  df = process_weather_data(df)
353
  if df is None:
354
  return None, "Failed to process API data"
355
 
356
- # Scrape snow depth data using Selenium and merge with API data
357
  snow_df = scrape_snow_depth()
358
  if not snow_df.empty:
359
  df = df.sort_values('timestamp')
360
  snow_df = snow_df.sort_values('timestamp')
361
- # Merge using nearest timestamp within a 30-minute tolerance
362
  df = pd.merge_asof(df, snow_df, on='timestamp', tolerance=pd.Timedelta('30min'), direction='nearest')
363
 
364
  print("\nProcessed combined data sample:")
365
  print(df.head())
366
 
367
  return df, None
368
-
369
  except Exception as e:
370
  return None, f"Error: {str(e)}"
371
 
@@ -374,30 +357,23 @@ def fetch_and_display(station_id, hours):
374
  Fetch data and create visualization.
375
  """
376
  df, error = get_weather_data(station_id, hours)
377
-
378
  if error:
379
  return None, error
380
-
381
  if df is not None and not df.empty:
382
  fig = create_visualizations(df)
383
  return fig, "Data fetched successfully!"
384
-
385
  return None, "No data available for the specified parameters."
386
 
387
  # Create Gradio interface
388
  with gr.Blocks() as demo:
389
  gr.Markdown("# Weather Data Viewer")
390
  gr.Markdown("Displays temperature, wind speed, and snow depth from NWS stations.")
391
-
392
  with gr.Row():
393
  station_id = gr.Textbox(label="Station ID", value="YCTIM")
394
  hours = gr.Slider(minimum=24, maximum=168, value=72, label="Hours of Data", step=24)
395
-
396
  fetch_btn = gr.Button("Fetch Data")
397
-
398
  plot_output = gr.Plot()
399
  message = gr.Textbox(label="Status")
400
-
401
  fetch_btn.click(
402
  fn=fetch_and_display,
403
  inputs=[station_id, hours],
 
27
  'Accept': 'application/json'
28
  }
29
 
30
+ # Calculate date range for last 3 days
31
  end_time = datetime.utcnow()
32
+ start_time = end_time - timedelta(hours=72)
33
 
34
  params = {
35
  'start': start_time.isoformat() + 'Z',
 
56
  if len(data['features']) > 0:
57
  print("\nFirst observation properties:")
58
  print(json.dumps(data['features'][0]['properties'], indent=2))
 
 
59
  keys = set()
60
  for feature in data['features']:
61
  keys.update(feature['properties'].keys())
62
+ print("\nAll available property keys:")
63
  print(sorted(list(keys)))
64
 
65
  return data
 
72
  def scrape_snow_depth():
73
  """
74
  Uses Selenium with a headless browser to load the weather.gov timeseries page,
75
+ waits until an element containing 'Snow Depth' is present, then extracts the table data.
76
  Returns a DataFrame with columns "timestamp" and "snowDepth".
77
  """
78
  url = ("https://www.weather.gov/wrh/timeseries?"
 
85
  chrome_options.add_argument("--no-sandbox")
86
  chrome_options.add_argument("--disable-dev-shm-usage")
87
 
88
+ # Initialize Chrome using webdriver_manager
89
  driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
90
 
91
  driver.get(url)
92
 
93
  try:
94
+ # Wait up to 30 seconds for any element containing the text "Snow Depth" to appear
95
+ WebDriverWait(driver, 30).until(
96
+ EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Snow Depth')]"))
97
  )
98
  except Exception as e:
99
+ print("Timeout waiting for 'Snow Depth' element to appear:", e)
100
 
101
+ # Allow extra time for dynamic content to load
102
  time.sleep(5)
103
 
104
  page_source = driver.page_source
 
106
 
107
  soup = BeautifulSoup(page_source, 'html.parser')
108
 
109
+ # Look through all tables for one that contains "Snow Depth" in its text
110
  tables = soup.find_all("table")
111
  target_table = None
112
  for table in tables:
113
+ table_text = table.get_text()
114
+ print("Found table text snippet:", table_text[:100])
115
+ if "Snow Depth" in table_text:
 
 
 
116
  target_table = table
117
  break
118
 
119
  if target_table is None:
120
+ print("No table with 'Snow Depth' found in the page.")
121
  return pd.DataFrame()
122
 
123
+ # Look for header cells in the table
124
  header_row = target_table.find("tr")
125
+ if not header_row:
126
+ print("No header row found in the table.")
127
+ return pd.DataFrame()
128
  headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
129
+ print("Table headers found:", headers)
130
+
131
+ # Identify column indices (using case-insensitive match)
132
  time_index = None
133
  snow_index = None
134
  for i, header in enumerate(headers):
 
137
  if "snow" in header.lower():
138
  snow_index = i
139
  if time_index is None or snow_index is None:
140
+ print("Required columns ('Time' and 'Snow Depth') not found in the table headers.")
141
  return pd.DataFrame()
142
 
143
+ # Extract rows (skip header)
144
  data = []
145
  rows = target_table.find_all("tr")[1:]
146
  for row in rows:
 
152
  data.append((time_text, snow_text))
153
 
154
  df = pd.DataFrame(data, columns=["Time", "Snow Depth"])
155
+ # Convert the "Time" column to datetime
156
  df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
157
+ # Convert "Snow Depth" to numeric (in inches)
158
  df["Snow Depth"] = pd.to_numeric(df["Snow Depth"], errors="coerce")
159
  print("Scraped snow depth data:")
160
  print(df.head())
161
+
162
+ # Rename columns to match API data
163
  return df.rename(columns={"Time": "timestamp", "Snow Depth": "snowDepth"})
164
 
165
  def parse_raw_data(data):
166
  """
167
+ Parse the raw JSON API data into a DataFrame.
168
  """
169
  if not data or 'features' not in data:
170
  return None
 
172
  records = []
173
  for feature in data['features']:
174
  props = feature['properties']
175
+ # Extract any snow-related fields if present
 
176
  snow_fields = {k: v for k, v in props.items() if 'snow' in k.lower()}
177
  if snow_fields:
178
  print("\nFound snow-related fields:")
 
185
  'wind_speed': props.get('windSpeed', {}).get('value'),
186
  'wind_direction': props.get('windDirection', {}).get('value')
187
  }
188
+ # Add any snow fields
 
189
  for k, v in snow_fields.items():
190
  if isinstance(v, dict) and 'value' in v:
191
  record[k] = v['value']
 
193
  record[k] = v
194
 
195
  records.append(record)
 
 
196
 
197
+ df = pd.DataFrame(records)
198
  print("\nDataFrame columns from API:")
199
  print(df.columns.tolist())
200
  print("\nSample of raw API data:")
 
228
  Create a wind rose subplot.
229
  """
230
  if data.empty or data['wind_direction'].isna().all() or data['wind_speed'].isna().all():
231
+ ax.text(0.5, 0.5, 'No wind data available',
232
  horizontalalignment='center',
233
  verticalalignment='center',
234
  transform=ax.transAxes)
 
236
  return
237
 
238
  plot_data = data.copy()
 
239
  direction_bins = np.arange(0, 361, 45)
240
  directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
 
241
  mask = plot_data['wind_direction'].notna() & plot_data['wind_speed'].notna()
242
  plot_data = plot_data[mask]
 
243
  if plot_data.empty:
244
+ ax.text(0.5, 0.5, 'No valid wind data',
245
  horizontalalignment='center',
246
  verticalalignment='center',
247
  transform=ax.transAxes)
248
  ax.set_title(title)
249
  return
250
+ plot_data.loc[:, 'direction_bin'] = pd.cut(plot_data['wind_direction'],
251
+ bins=direction_bins,
 
252
  labels=directions,
253
  include_lowest=True)
 
254
  wind_stats = plot_data.groupby('direction_bin', observed=True)['wind_speed'].mean()
 
255
  all_directions = pd.Series(0.0, index=directions)
256
  wind_stats = wind_stats.combine_first(all_directions)
 
257
  angles = np.linspace(0, 2*np.pi, len(directions), endpoint=False)
258
  values = [wind_stats[d] for d in directions]
 
259
  if any(v > 0 for v in values):
260
  ax.bar(angles, values, width=0.5, alpha=0.6)
261
  ax.set_xticks(angles)
262
  ax.set_xticklabels(directions)
263
  else:
264
+ ax.text(0.5, 0.5, 'No significant wind',
265
  horizontalalignment='center',
266
  verticalalignment='center',
267
  transform=ax.transAxes)
 
268
  ax.set_title(title)
269
 
270
  def create_visualizations(df):
 
293
  ax2.set_xlabel('')
294
  ax2.grid(True)
295
 
 
296
  if 'snowDepth' in df.columns and not df['snowDepth'].isna().all():
297
  ax3.plot(df['timestamp'], df['snowDepth'], linewidth=2)
298
+ ax3.set_ylim(0, 80)
299
  else:
300
+ ax3.text(0.5, 0.5, 'No snow depth data available',
301
  horizontalalignment='center',
302
  verticalalignment='center',
303
  transform=ax3.transAxes)
 
306
  ax3.set_xlabel('')
307
  ax3.grid(True)
308
 
 
309
  for ax in [ax1, ax2, ax3]:
310
  ax.tick_params(axis='x', rotation=45)
311
  ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M'))
 
318
  create_wind_rose(ax, day_data, pd.to_datetime(date).strftime('%Y-%m-%d'))
319
 
320
  plt.tight_layout()
 
321
  return fig
322
 
323
  def get_weather_data(station_id, hours):
 
326
  Combines API data and scraped snow depth data.
327
  """
328
  try:
 
329
  raw_data = get_raw_data(station_id)
330
  if raw_data is None:
331
  return None, "Failed to fetch data from API"
332
+
 
333
  df = parse_raw_data(raw_data)
334
  if df is None:
335
  return None, "Failed to parse API data"
336
+
 
337
  df = process_weather_data(df)
338
  if df is None:
339
  return None, "Failed to process API data"
340
 
341
+ # Attempt to scrape snow depth data using Selenium
342
  snow_df = scrape_snow_depth()
343
  if not snow_df.empty:
344
  df = df.sort_values('timestamp')
345
  snow_df = snow_df.sort_values('timestamp')
 
346
  df = pd.merge_asof(df, snow_df, on='timestamp', tolerance=pd.Timedelta('30min'), direction='nearest')
347
 
348
  print("\nProcessed combined data sample:")
349
  print(df.head())
350
 
351
  return df, None
 
352
  except Exception as e:
353
  return None, f"Error: {str(e)}"
354
 
 
357
  Fetch data and create visualization.
358
  """
359
  df, error = get_weather_data(station_id, hours)
 
360
  if error:
361
  return None, error
 
362
  if df is not None and not df.empty:
363
  fig = create_visualizations(df)
364
  return fig, "Data fetched successfully!"
 
365
  return None, "No data available for the specified parameters."
366
 
367
  # Create Gradio interface
368
  with gr.Blocks() as demo:
369
  gr.Markdown("# Weather Data Viewer")
370
  gr.Markdown("Displays temperature, wind speed, and snow depth from NWS stations.")
 
371
  with gr.Row():
372
  station_id = gr.Textbox(label="Station ID", value="YCTIM")
373
  hours = gr.Slider(minimum=24, maximum=168, value=72, label="Hours of Data", step=24)
 
374
  fetch_btn = gr.Button("Fetch Data")
 
375
  plot_output = gr.Plot()
376
  message = gr.Textbox(label="Status")
 
377
  fetch_btn.click(
378
  fn=fetch_and_display,
379
  inputs=[station_id, hours],