import gradio as gr from playwright.sync_api import sync_playwright, Error as PlaywrightError import subprocess import os import time # --- Helper to ensure Playwright browser is installed --- # This function attempts to install the browser if not found. # It's best-effort for standard Gradio spaces; a Dockerfile is more robust. def install_playwright_browser_if_needed(): try: # Check if browser is callable by Playwright with sync_playwright() as p: try: browser = p.chromium.launch(headless=True) browser.close() print("Playwright Chromium browser is available.") return True except PlaywrightError: print("Playwright Chromium browser not found or not executable. Attempting installation.") # If not found, try to install. # The `packages.txt` should have installed most system dependencies. # This command installs the browser itself into Playwright's managed location. print("Attempting to install Playwright Chromium browser executable...") try: # Using subprocess to run the playwright install command subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True) print("Playwright Chromium executable installed successfully.") # Verify again with sync_playwright() as p: browser = p.chromium.launch(headless=True) browser.close() print("Playwright Chromium successfully verified after installation.") return True except subprocess.CalledProcessError as e: print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}") return False except FileNotFoundError: print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.") return False except Exception as e: print(f"An error occurred during Playwright browser setup: {e}") return False # Run browser installation check when the app starts. # Logs will appear in the Hugging Face Space "Logs" tab. print("Initializing Space: Checking/Installing Playwright browser...") BROWSER_READY = install_playwright_browser_if_needed() if BROWSER_READY: print("Browser is ready.") else: print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.") # --- Screenshot function --- def take_web_screenshot(url: str): if not BROWSER_READY: return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot." if not url: return None, "Please enter a website URL." # Prepend https:// if no scheme is present, as Playwright requires it. if not (url.startswith("http://") or url.startswith("https://")): url = "https://" + url screenshot_path = None # Initialize here try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36" ) page = context.new_page() # Set a common viewport size page.set_viewport_size({"width": 1280, "height": 720}) print(f"Navigating to URL: {url}") # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed # Create a unique filename for the screenshot os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists timestamp = time.strftime("%Y%m%d-%H%M%S") # Simple sanitization for filename sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50] screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png" print(f"Taking screenshot and saving to: {screenshot_path}") page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport browser.close() print("Screenshot successful.") return screenshot_path, f"Screenshot of {url} captured successfully!" except PlaywrightError as e: error_message_detail = str(e) print(f"Playwright error: {error_message_detail}") if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail: status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name." elif "Timeout" in error_message_detail: status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected." elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower(): status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured." else: status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..." # If a screenshot was partially created or an old one exists, don't return it on error return None, status_message except Exception as e: print(f"An unexpected error occurred: {e}") return None, f"An unexpected error occurred: {str(e)[:200]}..." # --- Gradio Interface --- iface = gr.Interface( fn=take_web_screenshot, inputs=gr.Textbox( label="Website URL", placeholder="e.g., https://www.example.com or example.com" ), outputs=[ gr.Image(type="filepath", label="Website Screenshot"), gr.Textbox(label="Status") ], title="Website Screenshot Taker 📸", description=( "Enter a full website URL to capture a screenshot of its current view. " "The system will attempt to install Chromium if it's not already present (check Space logs for details). " "Prefix with http:// or https:// for best results." ), examples=[ ["https://gradio.app"], ["huggingface.co/spaces"], ["en.wikipedia.org/wiki/Python_(programming_language)"] ], allow_flagging="never", css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width ) # --- Main launch --- if __name__ == "__main__": print("Starting Gradio application...") iface.launch()