nakas commited on
Commit
5bed1bd
·
verified ·
1 Parent(s): 72bdaa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -103
app.py CHANGED
@@ -1,111 +1,115 @@
1
  import gradio as gr
2
- from playwright.sync_api import sync_playwright
3
- import time
4
- import json
5
- from download_browsers import download_playwright_browsers
6
 
7
- # Download browsers on startup
8
- download_playwright_browsers()
9
-
10
- def scrape_website(url, wait_time=5):
11
- """
12
- Scrape a website using Playwright headless browser
13
- Args:
14
- url (str): The URL to scrape
15
- wait_time (int): Time to wait for dynamic content to load
16
- Returns:
17
- dict: Dictionary containing scraped data
18
- """
19
  try:
20
- with sync_playwright() as p:
21
- # Launch browser in headless mode
22
- browser = p.chromium.launch(headless=True)
23
- context = browser.new_context(
24
- user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
25
- )
26
- page = context.new_page()
27
-
28
- # Go to URL and wait for network to be idle
29
- page.goto(url, wait_until="networkidle")
30
- time.sleep(wait_time) # Additional wait for dynamic content
31
-
32
- # Get basic page information
33
- title = page.title()
34
-
35
- # Extract all text content
36
- text_content = page.text_content('body')
37
-
38
- # Extract all links
39
- links = page.eval_on_selector_all('a[href]', 'elements => elements.map(el => el.href)')
40
-
41
- # Extract all images
42
- images = page.eval_on_selector_all('img[src]', 'elements => elements.map(el => el.src)')
43
-
44
- # Get meta description
45
- meta_description = page.eval_on_selector('meta[name="description"]',
46
- 'element => element.content') if page.query_selector('meta[name="description"]') else ''
47
-
48
- # Close browser
49
- browser.close()
50
-
51
- return {
52
- "title": title,
53
- "meta_description": meta_description,
54
- "text_content": text_content[:1000] + "...", # Truncate for display
55
- "links": links[:10], # Show first 10 links
56
- "images": images[:5], # Show first 5 images
57
- "status": "Success"
58
- }
59
-
60
- except Exception as e:
61
- return {
62
- "status": "Error",
63
- "error_message": str(e)
64
- }
65
 
66
- def format_output(result):
67
- """Format the output for better display in Gradio"""
68
- if result["status"] == "Error":
69
- return f"Error: {result['error_message']}"
70
 
71
- output = f"""
72
- ### Page Title
73
- {result['title']}
74
-
75
- ### Meta Description
76
- {result['meta_description']}
77
-
78
- ### First 1000 characters of content
79
- {result['text_content']}
80
-
81
- ### First 10 Links
82
- {json.dumps(result['links'], indent=2)}
83
-
84
- ### First 5 Images
85
- {json.dumps(result['images'], indent=2)}
86
- """
87
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Create Gradio interface
90
- iface = gr.Interface(
91
- fn=lambda url, wait_time: format_output(scrape_website(url, wait_time)),
92
- inputs=[
93
- gr.Textbox(label="URL to scrape", placeholder="https://example.com"),
94
- gr.Slider(minimum=1, maximum=15, value=5, step=1, label="Wait time (seconds)")
95
- ],
96
- outputs=gr.Markdown(),
97
- title="Web Scraper with Headless Browser",
98
- description="""
99
- Enter a URL to scrape its content using a headless browser.
100
- The tool will extract the title, meta description, text content, links, and images.
101
- Please use responsibly and respect websites' terms of service and robots.txt files.
102
- """,
103
- examples=[
104
- ["https://example.com", 5],
105
- ["https://news.ycombinator.com", 8]
106
- ]
107
- )
 
 
 
 
 
 
 
108
 
109
- # Launch the interface
110
  if __name__ == "__main__":
111
- iface.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
 
5
 
6
+ def parse_wind(wind_str):
7
+ if pd.isna(wind_str):
8
+ return np.nan, np.nan
 
 
 
 
 
 
 
 
 
9
  try:
10
+ if 'G' in str(wind_str):
11
+ speed, gust = str(wind_str).split('G')
12
+ return float(speed), float(gust)
13
+ return float(wind_str), np.nan
14
+ except:
15
+ return np.nan, np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def process_weather_data(data_str):
18
+ # Split the data into lines and remove empty lines
19
+ lines = [line.strip() for line in data_str.split('\n') if line.strip()]
 
20
 
21
+ # Find the start of actual data (after headers)
22
+ start_idx = 0
23
+ for i, line in enumerate(lines):
24
+ if 'Date/Time' in line:
25
+ start_idx = i + 1
26
+ break
27
+
28
+ # Process the data lines
29
+ data = []
30
+ for line in lines[start_idx:]:
31
+ parts = line.split('\t')
32
+ if len(parts) >= 8:
33
+ try:
34
+ date_str = parts[0].strip()
35
+ temp = float(parts[1]) if parts[1].strip() else np.nan
36
+ dew_point = float(parts[2]) if parts[2].strip() else np.nan
37
+ humidity = float(parts[3]) if parts[3].strip() else np.nan
38
+ wind_chill = float(parts[4]) if parts[4].strip() else np.nan
39
+ wind_speed, wind_gust = parse_wind(parts[6])
40
+ snow_depth = float(parts[7]) if parts[7].strip() else np.nan
41
+
42
+ data.append({
43
+ 'datetime': pd.to_datetime(date_str),
44
+ 'temperature': temp,
45
+ 'dew_point': dew_point,
46
+ 'humidity': humidity,
47
+ 'wind_chill': wind_chill,
48
+ 'wind_speed': wind_speed,
49
+ 'wind_gust': wind_gust,
50
+ 'snow_depth': snow_depth
51
+ })
52
+ except:
53
+ continue
54
+
55
+ df = pd.DataFrame(data)
56
+
57
+ # Calculate statistics
58
+ stats = {
59
+ 'Temperature Range': f"{df['temperature'].min():.1f}°F to {df['temperature'].max():.1f}°F",
60
+ 'Average Temperature': f"{df['temperature'].mean():.1f}°F",
61
+ 'Max Wind Speed': f"{df['wind_speed'].max():.1f} mph",
62
+ 'Max Wind Gust': f"{df['wind_gust'].max():.1f} mph",
63
+ 'Average Humidity': f"{df['humidity'].mean():.1f}%",
64
+ 'Max Snow Depth': f"{df['snow_depth'].max():.1f} inches"
65
+ }
66
+
67
+ # Create plots
68
+ temp_fig = gr.Plot()
69
+ df.plot(x='datetime', y=['temperature', 'wind_chill'],
70
+ title='Temperature and Wind Chill Over Time',
71
+ figsize=(12, 6))
72
+ temp_fig.pyplot()
73
+
74
+ wind_fig = gr.Plot()
75
+ df.plot(x='datetime', y=['wind_speed', 'wind_gust'],
76
+ title='Wind Speed and Gusts Over Time',
77
+ figsize=(12, 6))
78
+ wind_fig.pyplot()
79
+
80
+ stats_html = "<div style='font-size: 16px; line-height: 1.5;'>"
81
+ for key, value in stats.items():
82
+ stats_html += f"<p><strong>{key}:</strong> {value}</p>"
83
+ stats_html += "</div>"
84
+
85
+ return stats_html, temp_fig, wind_fig
86
 
87
+ # Create the Gradio interface
88
+ with gr.Blocks(title="Weather Data Analysis") as demo:
89
+ gr.Markdown("# Weather Data Analysis")
90
+ gr.Markdown("Paste weather data from weather.gov/wrh/timeseries in the format shown below:")
91
+
92
+ with gr.Row():
93
+ input_text = gr.Textbox(
94
+ label="Weather Data",
95
+ placeholder="Paste weather data here...",
96
+ lines=10
97
+ )
98
+
99
+ analyze_btn = gr.Button("Analyze Weather Data")
100
+
101
+ with gr.Row():
102
+ stats_output = gr.HTML(label="Statistics")
103
+
104
+ with gr.Row():
105
+ temp_plot = gr.Plot(label="Temperature Plot")
106
+ wind_plot = gr.Plot(label="Wind Plot")
107
+
108
+ analyze_btn.click(
109
+ fn=process_weather_data,
110
+ inputs=[input_text],
111
+ outputs=[stats_output, temp_plot, wind_plot]
112
+ )
113
 
 
114
  if __name__ == "__main__":
115
+ demo.launch()