Spaces:

NihalGazi
/

Website-Screenshot

Building

App Files Files Community

Website-Screenshot / app.py

NihalGazi

Update app.py

64d52e2 verified about 2 months ago

raw

history blame contribute delete

6.9 kB

	import gradio as gr
	from playwright.sync_api import sync_playwright, Error as PlaywrightError
	import subprocess
	import os
	import time

	# --- Helper to ensure Playwright browser is installed ---
	# This function attempts to install the browser if not found.
	# It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
	def install_playwright_browser_if_needed():
	try:
	# Check if browser is callable by Playwright
	with sync_playwright() as p:
	try:
	browser = p.chromium.launch(headless=True)
	browser.close()
	print("Playwright Chromium browser is available.")
	return True
	except PlaywrightError:
	print("Playwright Chromium browser not found or not executable. Attempting installation.")

	# If not found, try to install.
	# The `packages.txt` should have installed most system dependencies.
	# This command installs the browser itself into Playwright's managed location.
	print("Attempting to install Playwright Chromium browser executable...")
	try:
	# Using subprocess to run the playwright install command
	subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
	print("Playwright Chromium executable installed successfully.")
	# Verify again
	with sync_playwright() as p:
	browser = p.chromium.launch(headless=True)
	browser.close()
	print("Playwright Chromium successfully verified after installation.")
	return True
	except subprocess.CalledProcessError as e:
	print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
	return False
	except FileNotFoundError:
	print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
	return False
	except Exception as e:
	print(f"An error occurred during Playwright browser setup: {e}")
	return False

	# Run browser installation check when the app starts.
	# Logs will appear in the Hugging Face Space "Logs" tab.
	print("Initializing Space: Checking/Installing Playwright browser...")
	BROWSER_READY = install_playwright_browser_if_needed()
	if BROWSER_READY:
	print("Browser is ready.")
	else:
	print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")

	# --- Screenshot function ---
	def take_web_screenshot(url: str):
	if not BROWSER_READY:
	return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."

	if not url:
	return None, "Please enter a website URL."

	# Prepend https:// if no scheme is present, as Playwright requires it.
	if not (url.startswith("http://") or url.startswith("https://")):
	url = "https://" + url

	screenshot_path = None # Initialize here

	try:
	with sync_playwright() as p:
	browser = p.chromium.launch(headless=True)
	context = browser.new_context(
	user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
	)
	page = context.new_page()

	# Set a common viewport size
	page.set_viewport_size({"width": 1280, "height": 720})

	print(f"Navigating to URL: {url}")
	# Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
	page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout

	# Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
	# page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed

	# Create a unique filename for the screenshot
	os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
	timestamp = time.strftime("%Y%m%d-%H%M%S")
	# Simple sanitization for filename
	sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
	screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"

	print(f"Taking screenshot and saving to: {screenshot_path}")
	page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport

	browser.close()
	print("Screenshot successful.")
	return screenshot_path, f"Screenshot of {url} captured successfully!"

	except PlaywrightError as e:
	error_message_detail = str(e)
	print(f"Playwright error: {error_message_detail}")
	if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
	status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
	elif "Timeout" in error_message_detail:
	status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
	elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
	status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
	else:
	status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
	# If a screenshot was partially created or an old one exists, don't return it on error
	return None, status_message
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return None, f"An unexpected error occurred: {str(e)[:200]}..."

	# --- Gradio Interface ---
	iface = gr.Interface(
	fn=take_web_screenshot,
	inputs=gr.Textbox(
	label="Website URL",
	placeholder="e.g., https://www.example.com or example.com"
	),
	outputs=[
	gr.Image(type="filepath", label="Website Screenshot"),
	gr.Textbox(label="Status")
	],
	title="Website Screenshot Taker 📸",
	description=(
	"Enter a full website URL to capture a screenshot of its current view. "
	"The system will attempt to install Chromium if it's not already present (check Space logs for details). "
	"Prefix with http:// or https:// for best results."
	),
	examples=[
	["https://gradio.app"],
	["huggingface.co/spaces"],
	["en.wikipedia.org/wiki/Python_(programming_language)"]
	],
	allow_flagging="never",
	css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
	)

	# --- Main launch ---
	if __name__ == "__main__":
	print("Starting Gradio application...")
	iface.launch()