File size: 6,904 Bytes
64d52e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import gradio as gr
from playwright.sync_api import sync_playwright, Error as PlaywrightError
import subprocess
import os
import time

# --- Helper to ensure Playwright browser is installed ---
# This function attempts to install the browser if not found.
# It's best-effort for standard Gradio spaces; a Dockerfile is more robust.
def install_playwright_browser_if_needed():
    try:
        # Check if browser is callable by Playwright
        with sync_playwright() as p:
            try:
                browser = p.chromium.launch(headless=True)
                browser.close()
                print("Playwright Chromium browser is available.")
                return True
            except PlaywrightError:
                print("Playwright Chromium browser not found or not executable. Attempting installation.")

        # If not found, try to install.
        # The `packages.txt` should have installed most system dependencies.
        # This command installs the browser itself into Playwright's managed location.
        print("Attempting to install Playwright Chromium browser executable...")
        try:
            # Using subprocess to run the playwright install command
            subprocess.run(["playwright", "install", "chromium"], check=True, capture_output=True, text=True)
            print("Playwright Chromium executable installed successfully.")
            # Verify again
            with sync_playwright() as p:
                browser = p.chromium.launch(headless=True)
                browser.close()
            print("Playwright Chromium successfully verified after installation.")
            return True
        except subprocess.CalledProcessError as e:
            print(f"Playwright install chromium failed. STDERR: {e.stderr} STDOUT: {e.stdout}")
            return False
        except FileNotFoundError:
            print("Playwright command not found. Ensure 'playwright' is in requirements.txt and installed.")
            return False
    except Exception as e:
        print(f"An error occurred during Playwright browser setup: {e}")
        return False

# Run browser installation check when the app starts.
# Logs will appear in the Hugging Face Space "Logs" tab.
print("Initializing Space: Checking/Installing Playwright browser...")
BROWSER_READY = install_playwright_browser_if_needed()
if BROWSER_READY:
    print("Browser is ready.")
else:
    print("WARNING: Browser installation failed or could not be verified. Screenshot functionality may not work.")

# --- Screenshot function ---
def take_web_screenshot(url: str):
    if not BROWSER_READY:
        return None, "Error: Playwright browser (Chromium) is not properly installed or configured. Cannot take screenshot."

    if not url:
        return None, "Please enter a website URL."
    
    # Prepend https:// if no scheme is present, as Playwright requires it.
    if not (url.startswith("http://") or url.startswith("https://")):
        url = "https://" + url

    screenshot_path = None  # Initialize here

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
            )
            page = context.new_page()
            
            # Set a common viewport size
            page.set_viewport_size({"width": 1280, "height": 720})
            
            print(f"Navigating to URL: {url}")
            # Increased timeout and wait_until 'domcontentloaded' or 'load' can be more reliable
            page.goto(url, timeout=60000, wait_until="domcontentloaded") # 60 seconds timeout
            
            # Give some time for lazy-loaded elements if necessary, though 'networkidle' can be slow/unreliable
            # page.wait_for_timeout(2000) # Optional: 2 seconds, adjust as needed

            # Create a unique filename for the screenshot
            os.makedirs("screenshots", exist_ok=True) # Ensure screenshots directory exists
            timestamp = time.strftime("%Y%m%d-%H%M%S")
            # Simple sanitization for filename
            sanitized_url_part = "".join(c if c.isalnum() else "_" for c in url.split("://")[-1])[:50]
            screenshot_path = f"screenshots/screenshot_{sanitized_url_part}_{timestamp}.png"
            
            print(f"Taking screenshot and saving to: {screenshot_path}")
            page.screenshot(path=screenshot_path, full_page=False) # Captures the viewport
            
            browser.close()
            print("Screenshot successful.")
            return screenshot_path, f"Screenshot of {url} captured successfully!"

    except PlaywrightError as e:
        error_message_detail = str(e)
        print(f"Playwright error: {error_message_detail}")
        if "net::ERR_NAME_NOT_RESOLVED" in error_message_detail:
            status_message = f"Error: The URL '{url}' could not be resolved. Please check the domain name."
        elif "Timeout" in error_message_detail:
            status_message = f"Error: Timeout while loading '{url}'. The page might be too slow, offline, or protected."
        elif "SSL_ERROR" in error_message_detail or "ssl_error" in error_message_detail.lower():
             status_message = f"Error: SSL certificate issue with '{url}'. The site might be insecure or misconfigured."
        else:
            status_message = f"Error: Could not take screenshot of '{url}'. Details: {error_message_detail[:200]}..."
        # If a screenshot was partially created or an old one exists, don't return it on error
        return None, status_message
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None, f"An unexpected error occurred: {str(e)[:200]}..."

# --- Gradio Interface ---
iface = gr.Interface(
    fn=take_web_screenshot,
    inputs=gr.Textbox(
        label="Website URL",
        placeholder="e.g., https://www.example.com or example.com"
    ),
    outputs=[
        gr.Image(type="filepath", label="Website Screenshot"),
        gr.Textbox(label="Status")
    ],
    title="Website Screenshot Taker 📸",
    description=(
        "Enter a full website URL to capture a screenshot of its current view. "
        "The system will attempt to install Chromium if it's not already present (check Space logs for details). "
        "Prefix with http:// or https:// for best results."
    ),
    examples=[
        ["https://gradio.app"],
        ["huggingface.co/spaces"],
        ["en.wikipedia.org/wiki/Python_(programming_language)"]
    ],
    allow_flagging="never",
    css=".gradio-container {max-width: 960px !important; margin: auto !important;}" # Optional: center and limit width
)

# --- Main launch ---
if __name__ == "__main__":
    print("Starting Gradio application...")
    iface.launch()