Spaces:

NihalGazi
/

Website-Screenshot

Building

File size: 6,904 Bytes

64d52e2

import gradio as gr
from playwright.sync_api import sync_playwright, Error as PlaywrightError
import subprocess
import os
import time

# --- Helper to ensure Playwright browser is installed ---
# This function attempts to install the browser if not found.
# It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
def install_playwright_browser_if_needed():
    try:
        # Check if browser is callable by Playwright
        with sync_playwright() as p:
            try:
                browser = p.chromium.launch(headless=True)
                browser.close()
                print("Playwright Chromium browser is available.")
                return True
            except PlaywrightError:
                print("Playwright Chromium browser not found or not executable. Attempting installation.")

        # If not found, try to install.
        # The `packages.txt` should have installed most system dependencies.
        # This command installs the browser itself into Playwright's managed location.
        print("Attempting to install Playwright Chromium browser executable...")
        try:
            # Using subprocess to run the playwright install command
            subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
            print("Playwright Chromium executable installed successfully.")
            # Verify again
            with sync_playwright() as p:
                browser = p.chromium.launch(headless=True)
                browser.close()
            print("Playwright Chromium successfully verified after installation.")
            return True
        except subprocess.CalledProcessError as e:
            print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
            return False
        except FileNotFoundError:
            print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
            return False
    except Exception as e:
        print(f"An error occurred during Playwright browser setup: {e}")
        return False

# Run browser installation check when the app starts.
# Logs will appear in the Hugging Face Space "Logs" tab.
print("Initializing Space: Checking/Installing Playwright browser...")
BROWSER_READY = install_playwright_browser_if_needed()
if BROWSER_READY:
    print("Browser is ready.")
else:
    print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")

# --- Screenshot function ---
def take_web_screenshot(url: str):
    if not BROWSER_READY:
        return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."

    if not url:
        return None, "Please enter a website URL."
    
    # Prepend https:// if no scheme is present, as Playwright requires it.
    if not (url.startswith("http://") or url.startswith("https://")):
        url = "https://" + url

    screenshot_path = None  # Initialize here

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
            )
            page = context.new_page()
            
            # Set a common viewport size
            page.set_viewport_size({"width": 1280, "height": 720})
            
            print(f"Navigating to URL: {url}")
            # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
            page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
            
            # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
            # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed

            # Create a unique filename for the screenshot
            os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
            timestamp = time.strftime("%Y%m%d-%H%M%S")
            # Simple sanitization for filename
            sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
            screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
            
            print(f"Taking screenshot and saving to: {screenshot_path}")
            page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
            
            browser.close()
            print("Screenshot successful.")
            return screenshot_path, f"Screenshot of {url} captured successfully!"

    except PlaywrightError as e:
        error_message_detail = str(e)
        print(f"Playwright error: {error_message_detail}")
        if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
            status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
        elif "Timeout" in error_message_detail:
            status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
        elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
             status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
        else:
            status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
        # If a screenshot was partially created or an old one exists, don't return it on error
        return None, status_message
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None, f"An unexpected error occurred: {str(e)[:200]}..."

# --- Gradio Interface ---
iface = gr.Interface(
    fn=take_web_screenshot,
    inputs=gr.Textbox(
        label="Website URL",
        placeholder="e.g., https://www.example.com or example.com"
    ),
    outputs=[
        gr.Image(type="filepath", label="Website Screenshot"),
        gr.Textbox(label="Status")
    ],
    title="Website Screenshot Taker 📸",
    description=(
        "Enter a full website URL to capture a screenshot of its current view. "
        "The system will attempt to install Chromium if it's not already present (check Space logs for details). "
        "Prefix with http:// or https:// for best results."
    ),
    examples=[
        ["https://gradio.app"],
        ["huggingface.co/spaces"],
        ["en.wikipedia.org/wiki/Python_(programming_language)"]
    ],
    allow_flagging="never",
    css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
)

# --- Main launch ---
if __name__ == "__main__":
    print("Starting Gradio application...")
    iface.launch()