nakas's picture
Update app.py
ca32c0c verified
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from windrose import WindroseAxes
from datetime import datetime, timedelta
from playwright.sync_api import sync_playwright
import time
import os
import subprocess
import sys
from PIL import Image
import io
from zoneinfo import ZoneInfo
import re
def download_playwright_browsers():
"""Install Playwright browsers"""
try:
print("Installing Playwright browsers...")
result = subprocess.run([sys.executable, "-m", "playwright", "install", "chromium"],
capture_output=True,
text=True)
print("Installation output:", result.stdout)
if result.stderr:
print("Installation errors:", result.stderr)
return True
except Exception as e:
print(f"Error installing browsers: {e}")
return False
def extract_day_data(page):
"""Extract data from the current day's page"""
try:
# Get the date text
date_text = page.evaluate('''() => {
const month = document.getElementById('50').value;
const day = document.getElementById('51').value;
const year = document.getElementById('52').value;
return `${month}/${day}/${year}`;
}''')
# Extract table headers and rows
table_data = page.evaluate('''() => {
const table = document.querySelector('table.data-table');
if (!table) return null;
const headers = Array.from(table.querySelectorAll('thead th')).map(th => th.textContent.trim());
const rows = Array.from(table.querySelectorAll('tbody tr')).map(tr => {
return Array.from(tr.querySelectorAll('td')).map(td => td.textContent.trim());
});
return {headers, rows};
}''')
return date_text, table_data
except Exception as e:
print(f"Error extracting data: {str(e)}")
return None, None
def navigate_to_previous_day(page):
"""Navigate to the previous day"""
try:
# Click the previous day button
page.evaluate('''() => {
document.querySelector('button.date-nav-btn[title="Previous Day"]').click();
}''')
# Wait for page to update
time.sleep(2)
# Check if navigation was successful
current_date = page.evaluate('''() => {
return {
month: document.getElementById('50').value,
day: document.getElementById('51').value,
year: document.getElementById('52').value
};
}''')
return True
except Exception as e:
print(f"Error navigating to previous day: {str(e)}")
return False
def convert_to_dataframe(scraped_data):
"""Convert scraped data to a pandas DataFrame"""
if not scraped_data:
return pd.DataFrame()
all_rows = []
for item in scraped_data:
date_str = item['date']
headers = item['headers']
row_data = item['data']
location = item['location']
# Create a dictionary for this row
row_dict = {'date': date_str, 'location': location}
# Map row data to headers
for i, header in enumerate(headers):
if i < len(row_data):
row_dict[header.lower().replace(' ', '_')] = row_data[i]
all_rows.append(row_dict)
# Create DataFrame
df = pd.DataFrame(all_rows)
# Process and clean the DataFrame
# Convert numeric columns
numeric_columns = ['temp', 'rh', 'wind_speed', 'wind_gust', 'wind_dir', 'snow_depth', 'new_snow', 'h2o']
for col in numeric_columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col].str.replace(r'[^\d.-]', '', regex=True), errors='coerce')
# Convert time and date
if 'time' in df.columns and 'date' in df.columns:
# Create datetime column
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format='%m/%d/%Y %I:%M %p', errors='coerce')
# Extract date only for grouping
df['date'] = df['datetime'].dt.date
# Convert wind direction to degrees
if 'wind_dir' in df.columns:
# Map cardinal directions to degrees
direction_map = {
'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5, 'E': 90, 'ESE': 112.5,
'SE': 135, 'SSE': 157.5, 'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5,
'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5
}
df['wind_dir_deg'] = df['wind_dir'].map(lambda x: direction_map.get(x, np.nan) if isinstance(x, str) else np.nan)
return df
def calculate_daily_snow(df):
"""Calculate daily snow accumulation"""
# Group by date and sum new snow
daily_snow = df.groupby('date')['new_snow'].sum()
return daily_snow
def scrape_location_data(page, location_id, num_days):
"""Scrape data for a specific location"""
# Select the location
page.evaluate(f'''() => {{
const locationSelect = document.getElementById('48');
locationSelect.value = "{location_id}";
locationSelect.dispatchEvent(new Event('change', {{ bubbles: true }}));
}}''')
time.sleep(3) # Wait for location change to take effect
all_data = []
for day in range(num_days):
print(f"\nProcessing {location_id} - day {day + 1} of {num_days}")
# Get current date
current_date = page.evaluate('''() => {
return {
month: document.getElementById('50').value,
day: document.getElementById('51').value,
year: document.getElementById('52').value
};
}''')
date_str = f"{current_date['month']}/{current_date['day']}/{current_date['year']}"
print(f"Processing date: {date_str}")
# Extract data
_, table_data = extract_day_data(page)
if table_data and table_data['rows']:
rows_found = len(table_data['rows'])
print(f"Found {rows_found} rows of data")
for row in table_data['rows']:
row_data = {
'date': date_str,
'headers': table_data['headers'],
'data': row,
'location': location_id
}
all_data.append(row_data)
# Navigate to previous day if not the last iteration
if day < num_days - 1:
print(f"Navigating to previous day from {date_str}...")
success = navigate_to_previous_day(page)
if not success:
print("Failed to navigate to previous day!")
break
time.sleep(3)
else:
print(f"No data found for {date_str}")
return all_data
def create_comparison_plots(df_alpine, df_ridge=None):
"""Create weather plots with optional ridge data overlay"""
fig = plt.figure(figsize=(20, 24))
height_ratios = [1, 1, 1, 1, 1]
gs = GridSpec(5, 1, figure=fig, height_ratios=height_ratios)
gs.update(hspace=0.4)
# Temperature plot
ax1 = fig.add_subplot(gs[0])
ax1.plot(df_alpine['datetime'], df_alpine['temp'], label='Alpine Temperature', color='red', linewidth=2)
if df_ridge is not None:
ax1.plot(df_ridge['datetime'], df_ridge['temp'], label='Ridge Temperature', color='darkred', linewidth=2, linestyle='--')
ax1.set_title('Temperature Over Time', pad=20, fontsize=14)
ax1.set_xlabel('Date', fontsize=12)
ax1.set_ylabel('Temperature (°F)', fontsize=12)
ax1.legend(fontsize=12)
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# Wind speed plot
ax2 = fig.add_subplot(gs[1])
ax2.plot(df_alpine['datetime'], df_alpine['wind_speed'], label='Alpine Wind Speed', color='blue', linewidth=2)
ax2.plot(df_alpine['datetime'], df_alpine['wind_gust'], label='Alpine Wind Gust', color='orange', linewidth=2)
if df_ridge is not None:
ax2.plot(df_ridge['datetime'], df_ridge['wind_speed'], label='Ridge Wind Speed', color='darkblue', linewidth=2, linestyle='--')
ax2.plot(df_ridge['datetime'], df_ridge['wind_gust'], label='Ridge Wind Gust', color='darkorange', linewidth=2, linestyle='--')
ax2.set_title('Wind Speed and Gusts Over Time', pad=20, fontsize=14)
ax2.set_xlabel('Date', fontsize=12)
ax2.set_ylabel('Wind Speed (mph)', fontsize=12)
ax2.legend(fontsize=12)
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)
# Snow depth plot
ax3 = fig.add_subplot(gs[2])
ax3.plot(df_alpine['datetime'], df_alpine['snow_depth'], color='blue', label='Alpine Snow Depth', linewidth=2)
if df_ridge is not None:
ax3.plot(df_ridge['datetime'], df_ridge['snow_depth'], color='darkblue', label='Ridge Snow Depth', linewidth=2, linestyle='--')
ax3.set_title('Snow Depth Over Time', pad=20, fontsize=14)
ax3.set_xlabel('Date', fontsize=12)
ax3.set_ylabel('Snow Depth (inches)', fontsize=12)
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
ax3.tick_params(axis='x', rotation=45)
# Daily new snow bar plot
ax4 = fig.add_subplot(gs[3])
daily_snow_alpine = calculate_daily_snow(df_alpine)
bar_width = 0.35
if df_ridge is not None:
daily_snow_ridge = calculate_daily_snow(df_ridge)
# Plot bars side by side
ax4.bar(daily_snow_alpine.index - bar_width/2, daily_snow_alpine.values,
bar_width, color='blue', alpha=0.7, label='Alpine')
ax4.bar(daily_snow_ridge.index + bar_width/2, daily_snow_ridge.values,
bar_width, color='darkblue', alpha=0.7, label='Ridge')
else:
ax4.bar(daily_snow_alpine.index, daily_snow_alpine.values, color='blue', alpha=0.7)
ax4.set_title('Daily New Snow (4PM to 4PM)', pad=20, fontsize=14)
ax4.set_xlabel('Date', fontsize=12)
ax4.set_ylabel('New Snow (inches)', fontsize=12)
ax4.tick_params(axis='x', rotation=45)
ax4.grid(True, alpha=0.3)
if df_ridge is not None:
ax4.legend()
# H2O (SWE) plot
ax5 = fig.add_subplot(gs[4])
daily_swe_alpine = df_alpine.groupby('date')['h2o'].mean()
if df_ridge is not None:
daily_swe_ridge = df_ridge.groupby('date')['h2o'].mean()
ax5.bar(daily_swe_alpine.index - bar_width/2, daily_swe_alpine.values,
bar_width, color='lightblue', alpha=0.7, label='Alpine')
ax5.bar(daily_swe_ridge.index + bar_width/2, daily_swe_ridge.values,
bar_width, color='steelblue', alpha=0.7, label='Ridge')
else:
ax5.bar(daily_swe_alpine.index, daily_swe_alpine.values, color='lightblue', alpha=0.7)
ax5.set_title('Snow/Water Equivalent', pad=20, fontsize=14)
ax5.set_xlabel('Date', fontsize=12)
ax5.set_ylabel('SWE (inches)', fontsize=12)
ax5.tick_params(axis='x', rotation=45)
ax5.grid(True, alpha=0.3)
if df_ridge is not None:
ax5.legend()
plt.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
# Create wind rose (alpine only)
fig_rose = plt.figure(figsize=(10, 10))
ax_rose = WindroseAxes.from_ax(fig=fig_rose)
ax_rose.bar(df_alpine['wind_dir_deg'].dropna(), df_alpine['wind_speed'].dropna(),
bins=np.arange(0, 40, 5), normed=True, opening=0.8, edgecolor='white')
ax_rose.set_legend(title='Wind Speed (mph)', fontsize=10)
ax_rose.set_title('Wind Rose (Alpine)', fontsize=14, pad=20)
fig_rose.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
return fig, fig_rose
def analyze_weather_data(days=3, include_ridge=False):
"""Analyze weather data and create visualizations"""
try:
print("Launching browser...")
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-dev-shm-usage']
)
context = browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
timezone_id='America/Denver',
locale='en-US'
)
page = context.new_page()
page.goto("https://bridgerbowl.com/weather/history-tables/alpine")
page.wait_for_load_state('networkidle')
time.sleep(5)
# Scrape Alpine data
print("\nScraping Alpine data...")
alpine_data = scrape_location_data(page, "alpine", days)
df_alpine = convert_to_dataframe(alpine_data)
# Scrape Ridge data if requested
df_ridge = None
if include_ridge:
print("\nScraping Ridge data...")
ridge_data = scrape_location_data(page, "ridge", days)
df_ridge = convert_to_dataframe(ridge_data)
# Create plots and statistics
main_plots, wind_rose = create_comparison_plots(df_alpine, df_ridge)
# Calculate statistics
alpine_snow = calculate_daily_snow(df_alpine)
stats = {
'Alpine Temperature Range': f"{df_alpine['temp'].min():.1f}°F to {df_alpine['temp'].max():.1f}°F",
'Alpine Max Wind Speed': f"{df_alpine['wind_speed'].max():.1f} mph",
'Alpine Snow Depth': f"{df_alpine['snow_depth'].iloc[0]:.1f} inches",
'Alpine Total New Snow': f"{alpine_snow.sum():.1f} inches"
}
if include_ridge:
ridge_snow = calculate_daily_snow(df_ridge)
stats.update({
'Ridge Temperature Range': f"{df_ridge['temp'].min():.1f}°F to {df_ridge['temp'].max():.1f}°F",
'Ridge Max Wind Speed': f"{df_ridge['wind_speed'].max():.1f} mph",
'Ridge Snow Depth': f"{df_ridge['snow_depth'].iloc[0]:.1f} inches",
'Ridge Total New Snow': f"{ridge_snow.sum():.1f} inches"
})
# Create HTML report
html_report = "<h3>Weather Statistics:</h3>"
for key, value in stats.items():
html_report += f"<p><strong>{key}:</strong> {value}</p>"
browser.close()
return html_report, main_plots, wind_rose
except Exception as e:
print(f"Error during analysis: {str(e)}")
return f"Error during analysis: {str(e)}", None, None
# Create Gradio interface
with gr.Blocks(title="Bridger Bowl Weather Analyzer") as demo:
gr.Markdown("# Bridger Bowl Weather Analyzer")
gr.Markdown("""
Analyze weather data from Bridger Bowl's weather stations.
Specify how many days of historical data to analyze and whether to include Ridge data.
""")
with gr.Row():
days_input = gr.Number(
label="Number of Days to Analyze",
value=3,
minimum=1,
maximum=31
)
include_ridge = gr.Checkbox(
label="Include Ridge Data",
value=False
)
analyze_btn = gr.Button("Collect and Analyze Weather Data")
with gr.Row():
stats_output = gr.HTML(label="Statistics and Data Collection Info")
with gr.Row():
weather_plots = gr.Plot(label="Weather Plots")
wind_rose = gr.Plot(label="Wind Rose")
analyze_btn.click(
fn=analyze_weather_data,
inputs=[days_input, include_ridge],
outputs=[stats_output, weather_plots, wind_rose]
)
if __name__ == "__main__":
# Install Playwright browsers before launching the app
print("Setting up Playwright...")
download_playwright_browsers()
print("Launching Gradio interface...")
demo.launch()