NihalGazi's picture
Update app.py
64d52e2 verified
import gradio as gr
from playwright.sync_api import sync_playwright, Error as PlaywrightError
import subprocess
import os
import time
# --- Helper to ensure Playwright browser is installed ---
# This function attempts to install the browser if not found.
# It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
def install_playwright_browser_if_needed():
try:
# Check if browser is callable by Playwright
with sync_playwright() as p:
try:
browser = p.chromium.launch(headless=True)
browser.close()
print("Playwright Chromium browser is available.")
return True
except PlaywrightError:
print("Playwright Chromium browser not found or not executable. Attempting installation.")
# If not found, try to install.
# The `packages.txt` should have installed most system dependencies.
# This command installs the browser itself into Playwright's managed location.
print("Attempting to install Playwright Chromium browser executable...")
try:
# Using subprocess to run the playwright install command
subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
print("Playwright Chromium executable installed successfully.")
# Verify again
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
browser.close()
print("Playwright Chromium successfully verified after installation.")
return True
except subprocess.CalledProcessError as e:
print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
return False
except FileNotFoundError:
print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
return False
except Exception as e:
print(f"An error occurred during Playwright browser setup: {e}")
return False
# Run browser installation check when the app starts.
# Logs will appear in the Hugging Face Space "Logs" tab.
print("Initializing Space: Checking/Installing Playwright browser...")
BROWSER_READY = install_playwright_browser_if_needed()
if BROWSER_READY:
print("Browser is ready.")
else:
print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")
# --- Screenshot function ---
def take_web_screenshot(url: str):
if not BROWSER_READY:
return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."
if not url:
return None, "Please enter a website URL."
# Prepend https:// if no scheme is present, as Playwright requires it.
if not (url.startswith("http://") or url.startswith("https://")):
url = "https://" + url
screenshot_path = None # Initialize here
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
)
page = context.new_page()
# Set a common viewport size
page.set_viewport_size({"width": 1280, "height": 720})
print(f"Navigating to URL: {url}")
# Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
# Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
# page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed
# Create a unique filename for the screenshot
os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
timestamp = time.strftime("%Y%m%d-%H%M%S")
# Simple sanitization for filename
sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
print(f"Taking screenshot and saving to: {screenshot_path}")
page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
browser.close()
print("Screenshot successful.")
return screenshot_path, f"Screenshot of {url} captured successfully!"
except PlaywrightError as e:
error_message_detail = str(e)
print(f"Playwright error: {error_message_detail}")
if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
elif "Timeout" in error_message_detail:
status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
else:
status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
# If a screenshot was partially created or an old one exists, don't return it on error
return None, status_message
except Exception as e:
print(f"An unexpected error occurred: {e}")
return None, f"An unexpected error occurred: {str(e)[:200]}..."
# --- Gradio Interface ---
iface = gr.Interface(
fn=take_web_screenshot,
inputs=gr.Textbox(
label="Website URL",
placeholder="e.g., https://www.example.com or example.com"
),
outputs=[
gr.Image(type="filepath", label="Website Screenshot"),
gr.Textbox(label="Status")
],
title="Website Screenshot Taker 📸",
description=(
"Enter a full website URL to capture a screenshot of its current view. "
"The system will attempt to install Chromium if it's not already present (check Space logs for details). "
"Prefix with http:// or https:// for best results."
),
examples=[
["https://gradio.app"],
["huggingface.co/spaces"],
["en.wikipedia.org/wiki/Python_(programming_language)"]
],
allow_flagging="never",
css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
)
# --- Main launch ---
if __name__ == "__main__":
print("Starting Gradio application...")
iface.launch()