Spaces:
Building
Building
import gradio as gr | |
from playwright.sync_api import sync_playwright, Error as PlaywrightError | |
import subprocess | |
import os | |
import time | |
# --- Helper to ensure Playwright browser is installed --- | |
# This function attempts to install the browser if not found. | |
# It's best-effort for standard Gradio spaces; a Dockerfile is more robust. | |
def install_playwright_browser_if_needed(): | |
try: | |
# Check if browser is callable by Playwright | |
with sync_playwright() as p: | |
try: | |
browser = p.chromium.launch(headless=True) | |
browser.close() | |
print("Playwright Chromium browser is available.") | |
return True | |
except PlaywrightError: | |
print("Playwright Chromium browser not found or not executable. Attempting installation.") | |
# If not found, try to install. | |
# The `packages.txt` should have installed most system dependencies. | |
# This command installs the browser itself into Playwright's managed location. | |
print("Attempting to install Playwright Chromium browser executable...") | |
try: | |
# Using subprocess to run the playwright install command | |
subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True) | |
print("Playwright Chromium executable installed successfully.") | |
# Verify again | |
with sync_playwright() as p: | |
browser = p.chromium.launch(headless=True) | |
browser.close() | |
print("Playwright Chromium successfully verified after installation.") | |
return True | |
except subprocess.CalledProcessError as e: | |
print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}") | |
return False | |
except FileNotFoundError: | |
print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.") | |
return False | |
except Exception as e: | |
print(f"An error occurred during Playwright browser setup: {e}") | |
return False | |
# Run browser installation check when the app starts. | |
# Logs will appear in the Hugging Face Space "Logs" tab. | |
print("Initializing Space: Checking/Installing Playwright browser...") | |
BROWSER_READY = install_playwright_browser_if_needed() | |
if BROWSER_READY: | |
print("Browser is ready.") | |
else: | |
print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.") | |
# --- Screenshot function --- | |
def take_web_screenshot(url: str): | |
if not BROWSER_READY: | |
return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot." | |
if not url: | |
return None, "Please enter a website URL." | |
# Prepend https:// if no scheme is present, as Playwright requires it. | |
if not (url.startswith("http://") or url.startswith("https://")): | |
url = "https://" + url | |
screenshot_path = None # Initialize here | |
try: | |
with sync_playwright() as p: | |
browser = p.chromium.launch(headless=True) | |
context = browser.new_context( | |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36" | |
) | |
page = context.new_page() | |
# Set a common viewport size | |
page.set_viewport_size({"width": 1280, "height": 720}) | |
print(f"Navigating to URL: {url}") | |
# Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable | |
page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout | |
# Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable | |
# page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed | |
# Create a unique filename for the screenshot | |
os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists | |
timestamp = time.strftime("%Y%m%d-%H%M%S") | |
# Simple sanitization for filename | |
sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50] | |
screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png" | |
print(f"Taking screenshot and saving to: {screenshot_path}") | |
page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport | |
browser.close() | |
print("Screenshot successful.") | |
return screenshot_path, f"Screenshot of {url} captured successfully!" | |
except PlaywrightError as e: | |
error_message_detail = str(e) | |
print(f"Playwright error: {error_message_detail}") | |
if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail: | |
status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name." | |
elif "Timeout" in error_message_detail: | |
status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected." | |
elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower(): | |
status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured." | |
else: | |
status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..." | |
# If a screenshot was partially created or an old one exists, don't return it on error | |
return None, status_message | |
except Exception as e: | |
print(f"An unexpected error occurred: {e}") | |
return None, f"An unexpected error occurred: {str(e)[:200]}..." | |
# --- Gradio Interface --- | |
iface = gr.Interface( | |
fn=take_web_screenshot, | |
inputs=gr.Textbox( | |
label="Website URL", | |
placeholder="e.g., https://www.example.com or example.com" | |
), | |
outputs=[ | |
gr.Image(type="filepath", label="Website Screenshot"), | |
gr.Textbox(label="Status") | |
], | |
title="Website Screenshot Taker 📸", | |
description=( | |
"Enter a full website URL to capture a screenshot of its current view. " | |
"The system will attempt to install Chromium if it's not already present (check Space logs for details). " | |
"Prefix with http:// or https:// for best results." | |
), | |
examples=[ | |
["https://gradio.app"], | |
["huggingface.co/spaces"], | |
["en.wikipedia.org/wiki/Python_(programming_language)"] | |
], | |
allow_flagging="never", | |
css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width | |
) | |
# --- Main launch --- | |
if __name__ == "__main__": | |
print("Starting Gradio application...") | |
iface.launch() | |