Spaces:
Building
Building
File size: 6,904 Bytes
64d52e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import gradio as gr
from playwright.sync_api import sync_playwright, Error as PlaywrightError
import subprocess
import os
import time
# --- Helper to ensure Playwright browser is installed ---
# This function attempts to install the browser if not found.
# It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
def install_playwright_browser_if_needed():
try:
# Check if browser is callable by Playwright
with sync_playwright() as p:
try:
browser = p.chromium.launch(headless=True)
browser.close()
print("Playwright Chromium browser is available.")
return True
except PlaywrightError:
print("Playwright Chromium browser not found or not executable. Attempting installation.")
# If not found, try to install.
# The `packages.txt` should have installed most system dependencies.
# This command installs the browser itself into Playwright's managed location.
print("Attempting to install Playwright Chromium browser executable...")
try:
# Using subprocess to run the playwright install command
subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
print("Playwright Chromium executable installed successfully.")
# Verify again
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
browser.close()
print("Playwright Chromium successfully verified after installation.")
return True
except subprocess.CalledProcessError as e:
print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
return False
except FileNotFoundError:
print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
return False
except Exception as e:
print(f"An error occurred during Playwright browser setup: {e}")
return False
# Run browser installation check when the app starts.
# Logs will appear in the Hugging Face Space "Logs" tab.
print("Initializing Space: Checking/Installing Playwright browser...")
BROWSER_READY = install_playwright_browser_if_needed()
if BROWSER_READY:
print("Browser is ready.")
else:
print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")
# --- Screenshot function ---
def take_web_screenshot(url: str):
if not BROWSER_READY:
return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."
if not url:
return None, "Please enter a website URL."
# Prepend https:// if no scheme is present, as Playwright requires it.
if not (url.startswith("http://") or url.startswith("https://")):
url = "https://" + url
screenshot_path = None # Initialize here
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
)
page = context.new_page()
# Set a common viewport size
page.set_viewport_size({"width": 1280, "height": 720})
print(f"Navigating to URL: {url}")
# Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
# Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
# page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed
# Create a unique filename for the screenshot
os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
timestamp = time.strftime("%Y%m%d-%H%M%S")
# Simple sanitization for filename
sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
print(f"Taking screenshot and saving to: {screenshot_path}")
page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
browser.close()
print("Screenshot successful.")
return screenshot_path, f"Screenshot of {url} captured successfully!"
except PlaywrightError as e:
error_message_detail = str(e)
print(f"Playwright error: {error_message_detail}")
if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
elif "Timeout" in error_message_detail:
status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
else:
status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
# If a screenshot was partially created or an old one exists, don't return it on error
return None, status_message
except Exception as e:
print(f"An unexpected error occurred: {e}")
return None, f"An unexpected error occurred: {str(e)[:200]}..."
# --- Gradio Interface ---
iface = gr.Interface(
fn=take_web_screenshot,
inputs=gr.Textbox(
label="Website URL",
placeholder="e.g., https://www.example.com or example.com"
),
outputs=[
gr.Image(type="filepath", label="Website Screenshot"),
gr.Textbox(label="Status")
],
title="Website Screenshot Taker 📸",
description=(
"Enter a full website URL to capture a screenshot of its current view. "
"The system will attempt to install Chromium if it's not already present (check Space logs for details). "
"Prefix with http:// or https:// for best results."
),
examples=[
["https://gradio.app"],
["huggingface.co/spaces"],
["en.wikipedia.org/wiki/Python_(programming_language)"]
],
allow_flagging="never",
css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
)
# --- Main launch ---
if __name__ == "__main__":
print("Starting Gradio application...")
iface.launch()
|