NihalGazi commited on
Commit
7173bd5
·
verified ·
1 Parent(s): 2cb9ffd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -26
app.py CHANGED
@@ -1,32 +1,147 @@
1
  import gradio as gr
2
- import asyncio
3
- from pyppeteer import launch
4
-
5
- async def _screenshot(url: str, width: int = 1280, height: int = 720) -> bytes:
6
- browser = await launch(options={
7
- 'args': ['--no-sandbox', '--disable-setuid-sandbox'],
8
- 'headless': True
9
- })
10
- page = await browser.newPage()
11
- await page.setViewport({'width': width, 'height': height})
12
- await page.goto(url, {'waitUntil': 'networkidle2'})
13
- img_bytes = await page.screenshot({'fullPage': True})
14
- await browser.close()
15
- return img_bytes
16
-
17
-
18
- def screenshot(url: str) -> bytes:
19
- if not url.startswith(('http://', 'https://')):
20
- url = 'http://' + url
21
- return asyncio.get_event_loop().run_until_complete(_screenshot(url))
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  iface = gr.Interface(
24
- fn=screenshot,
25
- inputs=gr.Textbox(label="Website URL", placeholder="https://example.com"),
26
- outputs=gr.Image(type="pil", label="Screenshot"),
27
- title="Website Screenshot Generator",
28
- description="Enter a URL and get a full-page screenshot back. Powered by Pyppeteer."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
 
 
31
  if __name__ == "__main__":
32
- iface.launch()
 
 
1
  import gradio as gr
2
+ from playwright.sync_api import sync_playwright, Error as PlaywrightError
3
+ import subprocess
4
+ import os
5
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # --- Helper to ensure Playwright browser is installed ---
8
+ # This function attempts to install the browser if not found.
9
+ # It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
10
+ def install_playwright_browser_if_needed():
11
+ try:
12
+ # Check if browser is callable by Playwright
13
+ with sync_playwright() as p:
14
+ try:
15
+ browser = p.chromium.launch(headless=True)
16
+ browser.close()
17
+ print("Playwright Chromium browser is available.")
18
+ return True
19
+ except PlaywrightError:
20
+ print("Playwright Chromium browser not found or not executable. Attempting installation.")
21
+
22
+ # If not found, try to install.
23
+ # The `packages.txt` should have installed most system dependencies.
24
+ # This command installs the browser itself into Playwright's managed location.
25
+ print("Attempting to install Playwright Chromium browser executable...")
26
+ try:
27
+ # Using subprocess to run the playwright install command
28
+ subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
29
+ print("Playwright Chromium executable installed successfully.")
30
+ # Verify again
31
+ with sync_playwright() as p:
32
+ browser = p.chromium.launch(headless=True)
33
+ browser.close()
34
+ print("Playwright Chromium successfully verified after installation.")
35
+ return True
36
+ except subprocess.CalledProcessError as e:
37
+ print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
38
+ return False
39
+ except FileNotFoundError:
40
+ print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
41
+ return False
42
+ except Exception as e:
43
+ print(f"An error occurred during Playwright browser setup: {e}")
44
+ return False
45
+
46
+ # Run browser installation check when the app starts.
47
+ # Logs will appear in the Hugging Face Space "Logs" tab.
48
+ print("Initializing Space: Checking/Installing Playwright browser...")
49
+ BROWSER_READY = install_playwright_browser_if_needed()
50
+ if BROWSER_READY:
51
+ print("Browser is ready.")
52
+ else:
53
+ print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")
54
+
55
+ # --- Screenshot function ---
56
+ def take_web_screenshot(url: str):
57
+ if not BROWSER_READY:
58
+ return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."
59
+
60
+ if not url:
61
+ return None, "Please enter a website URL."
62
+
63
+ # Prepend https:// if no scheme is present, as Playwright requires it.
64
+ if not (url.startswith("http://") or url.startswith("https://")):
65
+ url = "https://" + url
66
+
67
+ screenshot_path = None # Initialize here
68
+
69
+ try:
70
+ with sync_playwright() as p:
71
+ browser = p.chromium.launch(headless=True)
72
+ context = browser.new_context(
73
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
74
+ )
75
+ page = context.new_page()
76
+
77
+ # Set a common viewport size
78
+ page.set_viewport_size({"width": 1280, "height": 720})
79
+
80
+ print(f"Navigating to URL: {url}")
81
+ # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
82
+ page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
83
+
84
+ # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
85
+ # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed
86
+
87
+ # Create a unique filename for the screenshot
88
+ os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
89
+ timestamp = time.strftime("%Y%m%d-%H%M%S")
90
+ # Simple sanitization for filename
91
+ sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
92
+ screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
93
+
94
+ print(f"Taking screenshot and saving to: {screenshot_path}")
95
+ page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
96
+
97
+ browser.close()
98
+ print("Screenshot successful.")
99
+ return screenshot_path, f"Screenshot of {url} captured successfully!"
100
+
101
+ except PlaywrightError as e:
102
+ error_message_detail = str(e)
103
+ print(f"Playwright error: {error_message_detail}")
104
+ if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
105
+ status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
106
+ elif "Timeout" in error_message_detail:
107
+ status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
108
+ elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
109
+ status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
110
+ else:
111
+ status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
112
+ # If a screenshot was partially created or an old one exists, don't return it on error
113
+ return None, status_message
114
+ except Exception as e:
115
+ print(f"An unexpected error occurred: {e}")
116
+ return None, f"An unexpected error occurred: {str(e)[:200]}..."
117
+
118
+ # --- Gradio Interface ---
119
  iface = gr.Interface(
120
+ fn=take_web_screenshot,
121
+ inputs=gr.Textbox(
122
+ label="Website URL",
123
+ placeholder="e.g., https://www.example.com or example.com"
124
+ ),
125
+ outputs=[
126
+ gr.Image(type="filepath", label="Website Screenshot"),
127
+ gr.Textbox(label="Status")
128
+ ],
129
+ title="Website Screenshot Taker 📸",
130
+ description=(
131
+ "Enter a full website URL to capture a screenshot of its current view. "
132
+ "The system will attempt to install Chromium if it's not already present (check Space logs for details). "
133
+ "Prefix with http:// or https:// for best results."
134
+ ),
135
+ examples=[
136
+ ["https://gradio.app"],
137
+ ["huggingface.co/spaces"],
138
+ ["en.wikipedia.org/wiki/Python_(programming_language)"]
139
+ ],
140
+ allow_flagging="never",
141
+ css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
142
  )
143
 
144
+ # --- Main launch ---
145
  if __name__ == "__main__":
146
+ print("Starting Gradio application...")
147
+ iface.launch()