NihalGazi commited on
Commit
64d52e2
·
verified ·
1 Parent(s): c0bfa7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -146
app.py CHANGED
@@ -1,146 +1,147 @@
1
- import gradio as gr
2
- from playwright.sync_api import sync_playwright, Error as PlaywrightError
3
- import subprocess
4
- import os
5
- import time
6
-
7
- # --- Helper to ensure Playwright browser is installed ---
8
- # This function attempts to install the browser if not found.
9
- # It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
10
- def install_playwright_browser_if_needed():
11
- try:
12
- # Check if browser is callable by Playwright
13
- with sync_playwright() as p:
14
- try:
15
- browser = p.chromium.launch(headless=False)
16
- browser.close()
17
- print("Playwright Chromium browser is available.")
18
- return True
19
- except PlaywrightError:
20
- print("Playwright Chromium browser not found or not executable. Attempting installation.")
21
-
22
- # If not found, try to install.
23
- # The `packages.txt` should have installed most system dependencies.
24
- # This command installs the browser itself into Playwright's managed location.
25
- print("Attempting to install Playwright Chromium browser executable...")
26
- try:
27
- # Using subprocess to run the playwright install command
28
- subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
29
- print("Playwright Chromium executable installed successfully.")
30
- # Verify again
31
- with sync_playwright() as p:
32
- browser = p.chromium.launch(headless=False)
33
- browser.close()
34
- print("Playwright Chromium successfully verified after installation.")
35
- return True
36
- except subprocess.CalledProcessError as e:
37
- print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
38
- return False
39
- except FileNotFoundError:
40
- print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
41
- return False
42
- except Exception as e:
43
- print(f"An error occurred during Playwright browser setup: {e}")
44
- return False
45
-
46
- # Run browser installation check when the app starts.
47
- # Logs will appear in the Hugging Face Space "Logs" tab.
48
- print("Initializing Space: Checking/Installing Playwright browser...")
49
- BROWSER_READY = install_playwright_browser_if_needed()
50
- if BROWSER_READY:
51
- print("Browser is ready.")
52
- else:
53
- print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")
54
-
55
- # --- Screenshot function ---
56
- def take_web_screenshot(url: str):
57
- if not BROWSER_READY:
58
- return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."
59
-
60
- if not url:
61
- return None, "Please enter a website URL."
62
-
63
- # Prepend https:// if no scheme is present, as Playwright requires it.
64
- if not (url.startswith("http://") or url.startswith("https://")):
65
- url = "https://" + url
66
-
67
- screenshot_path = None # Initialize here
68
-
69
- try:
70
- with sync_playwright() as p:
71
- browser = p.chromium.launch(headless=False)
72
- context = browser.new_context(
73
- user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
74
- )
75
- page = context.new_page()
76
-
77
- # Set a common viewport size
78
- page.set_viewport_size({"width": 1280, "height": 720})
79
-
80
- print(f"Navigating to URL: {url}")
81
- # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
82
- page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
83
-
84
- # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
85
- # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed
86
-
87
- # Create a unique filename for the screenshot
88
- os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
89
- timestamp = time.strftime("%Y%m%d-%H%M%S")
90
- # Simple sanitization for filename
91
- sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
92
- screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
93
-
94
- print(f"Taking screenshot and saving to: {screenshot_path}")
95
- page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
96
-
97
- browser.close()
98
- print("Screenshot successful.")
99
- return screenshot_path, f"Screenshot of {url} captured successfully!"
100
-
101
- except PlaywrightError as e:
102
- error_message_detail = str(e)
103
- print(f"Playwright error: {error_message_detail}")
104
- if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
105
- status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
106
- elif "Timeout" in error_message_detail:
107
- status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
108
- elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
109
- status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
110
- else:
111
- status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
112
- # If a screenshot was partially created or an old one exists, don't return it on error
113
- return None, status_message
114
- except Exception as e:
115
- print(f"An unexpected error occurred: {e}")
116
- return None, f"An unexpected error occurred: {str(e)[:200]}..."
117
-
118
- # --- Gradio Interface ---
119
- iface = gr.Interface(
120
- fn=take_web_screenshot,
121
- inputs=gr.Textbox(
122
- label="Website URL",
123
- placeholder="e.g., https://www.example.com or example.com"
124
- ),
125
- outputs=[
126
- gr.Image(type="filepath", label="Website Screenshot"),
127
- gr.Textbox(label="Status")
128
- ],
129
- title="Website Screenshot Taker 📸",
130
- description=(
131
- "Enter a full website URL to capture a screenshot of its current view. "
132
- "Prefix with http:// or https:// for best results."
133
- ),
134
- examples=[
135
- ["https://gradio.app"],
136
- ["huggingface.co/spaces"],
137
- ["en.wikipedia.org/wiki/Python_(programming_language)"]
138
- ],
139
- allow_flagging="never",
140
- css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
141
- )
142
-
143
- # --- Main launch ---
144
- if __name__ == "__main__":
145
- print("Starting Gradio application...")
146
- iface.launch()
 
 
1
+ import gradio as gr
2
+ from playwright.sync_api import sync_playwright, Error as PlaywrightError
3
+ import subprocess
4
+ import os
5
+ import time
6
+
7
+ # --- Helper to ensure Playwright browser is installed ---
8
+ # This function attempts to install the browser if not found.
9
+ # It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
10
+ def install_playwright_browser_if_needed():
11
+ try:
12
+ # Check if browser is callable by Playwright
13
+ with sync_playwright() as p:
14
+ try:
15
+ browser = p.chromium.launch(headless=True)
16
+ browser.close()
17
+ print("Playwright Chromium browser is available.")
18
+ return True
19
+ except PlaywrightError:
20
+ print("Playwright Chromium browser not found or not executable. Attempting installation.")
21
+
22
+ # If not found, try to install.
23
+ # The `packages.txt` should have installed most system dependencies.
24
+ # This command installs the browser itself into Playwright's managed location.
25
+ print("Attempting to install Playwright Chromium browser executable...")
26
+ try:
27
+ # Using subprocess to run the playwright install command
28
+ subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
29
+ print("Playwright Chromium executable installed successfully.")
30
+ # Verify again
31
+ with sync_playwright() as p:
32
+ browser = p.chromium.launch(headless=True)
33
+ browser.close()
34
+ print("Playwright Chromium successfully verified after installation.")
35
+ return True
36
+ except subprocess.CalledProcessError as e:
37
+ print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
38
+ return False
39
+ except FileNotFoundError:
40
+ print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
41
+ return False
42
+ except Exception as e:
43
+ print(f"An error occurred during Playwright browser setup: {e}")
44
+ return False
45
+
46
+ # Run browser installation check when the app starts.
47
+ # Logs will appear in the Hugging Face Space "Logs" tab.
48
+ print("Initializing Space: Checking/Installing Playwright browser...")
49
+ BROWSER_READY = install_playwright_browser_if_needed()
50
+ if BROWSER_READY:
51
+ print("Browser is ready.")
52
+ else:
53
+ print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")
54
+
55
+ # --- Screenshot function ---
56
+ def take_web_screenshot(url: str):
57
+ if not BROWSER_READY:
58
+ return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."
59
+
60
+ if not url:
61
+ return None, "Please enter a website URL."
62
+
63
+ # Prepend https:// if no scheme is present, as Playwright requires it.
64
+ if not (url.startswith("http://") or url.startswith("https://")):
65
+ url = "https://" + url
66
+
67
+ screenshot_path = None # Initialize here
68
+
69
+ try:
70
+ with sync_playwright() as p:
71
+ browser = p.chromium.launch(headless=True)
72
+ context = browser.new_context(
73
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
74
+ )
75
+ page = context.new_page()
76
+
77
+ # Set a common viewport size
78
+ page.set_viewport_size({"width": 1280, "height": 720})
79
+
80
+ print(f"Navigating to URL: {url}")
81
+ # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
82
+ page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
83
+
84
+ # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
85
+ # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed
86
+
87
+ # Create a unique filename for the screenshot
88
+ os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
89
+ timestamp = time.strftime("%Y%m%d-%H%M%S")
90
+ # Simple sanitization for filename
91
+ sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
92
+ screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
93
+
94
+ print(f"Taking screenshot and saving to: {screenshot_path}")
95
+ page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
96
+
97
+ browser.close()
98
+ print("Screenshot successful.")
99
+ return screenshot_path, f"Screenshot of {url} captured successfully!"
100
+
101
+ except PlaywrightError as e:
102
+ error_message_detail = str(e)
103
+ print(f"Playwright error: {error_message_detail}")
104
+ if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
105
+ status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
106
+ elif "Timeout" in error_message_detail:
107
+ status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
108
+ elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
109
+ status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
110
+ else:
111
+ status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
112
+ # If a screenshot was partially created or an old one exists, don't return it on error
113
+ return None, status_message
114
+ except Exception as e:
115
+ print(f"An unexpected error occurred: {e}")
116
+ return None, f"An unexpected error occurred: {str(e)[:200]}..."
117
+
118
+ # --- Gradio Interface ---
119
+ iface = gr.Interface(
120
+ fn=take_web_screenshot,
121
+ inputs=gr.Textbox(
122
+ label="Website URL",
123
+ placeholder="e.g., https://www.example.com or example.com"
124
+ ),
125
+ outputs=[
126
+ gr.Image(type="filepath", label="Website Screenshot"),
127
+ gr.Textbox(label="Status")
128
+ ],
129
+ title="Website Screenshot Taker 📸",
130
+ description=(
131
+ "Enter a full website URL to capture a screenshot of its current view. "
132
+ "The system will attempt to install Chromium if it's not already present (check Space logs for details). "
133
+ "Prefix with http:// or https:// for best results."
134
+ ),
135
+ examples=[
136
+ ["https://gradio.app"],
137
+ ["huggingface.co/spaces"],
138
+ ["en.wikipedia.org/wiki/Python_(programming_language)"]
139
+ ],
140
+ allow_flagging="never",
141
+ css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
142
+ )
143
+
144
+ # --- Main launch ---
145
+ if __name__ == "__main__":
146
+ print("Starting Gradio application...")
147
+ iface.launch()