Add webpage screenshot capture with Playwright

Implements automated screenshot capture for all discovered HTTP/HTTPS services using Playwright with headless Chromium. Screenshots are saved as PNG files and referenced in JSON reports. Features: - Separate ScreenshotCapture module for code organization - Viewport screenshots (1280x720) with 15-second timeout - Graceful handling of self-signed certificates - Browser reuse for optimal performance - Screenshots stored in timestamped directories - Comprehensive documentation in README.md and new CLAUDE.md Technical changes: - Added src/screenshot_capture.py: Screenshot capture module with context manager pattern - Updated src/scanner.py: Integrated screenshot capture into HTTP/HTTPS analysis phase - Updated Dockerfile: Added Chromium and Playwright browser installation - Updated requirements.txt: Added playwright==1.40.0 - Added CLAUDE.md: Developer documentation and implementation guide - Updated README.md: Enhanced features section, added screenshot details and troubleshooting - Updated .gitignore: Ignore entire output/ directory including screenshots 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-14 00:57:36 +00:00
parent 48755a8539
commit 61cc24f8d2
7 changed files with 822 additions and 25 deletions
--- a/src/scanner.py
+++ b/src/scanner.py
@@ -5,6 +5,7 @@ SneakyScanner - Masscan-based network scanner with YAML configuration

 import argparse
 import json
+import logging
 import subprocess
 import sys
 import tempfile
@@ -18,6 +19,8 @@ import yaml
 from libnmap.process import NmapProcess
 from libnmap.parser import NmapParser

+from screenshot_capture import ScreenshotCapture
+
 # Force unbuffered output for Docker
 sys.stdout.reconfigure(line_buffering=True)
 sys.stderr.reconfigure(line_buffering=True)
@@ -31,6 +34,7 @@ class SneakyScanner:
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.config = self._load_config()
+        self.screenshot_capture = None

    def _load_config(self) -> Dict[str, Any]:
        """Load and validate YAML configuration"""
@@ -511,6 +515,16 @@ class SneakyScanner:

                result = {'protocol': protocol}

+                # Capture screenshot if screenshot capture is enabled
+                if self.screenshot_capture:
+                    try:
+                        screenshot_path = self.screenshot_capture.capture(ip, port, protocol)
+                        if screenshot_path:
+                            result['screenshot'] = screenshot_path
+                    except Exception as e:
+                        print(f"  Screenshot capture error for {ip}:{port}: {e}",
+                              file=sys.stderr, flush=True)
+
                # If HTTPS, analyze SSL/TLS
                if protocol == 'https':
                    try:
@@ -545,6 +559,14 @@ class SneakyScanner:

        # Record start time
        start_time = time.time()
+        scan_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+
+        # Initialize screenshot capture
+        self.screenshot_capture = ScreenshotCapture(
+            output_dir=str(self.output_dir),
+            scan_timestamp=scan_timestamp,
+            timeout=15
+        )

        # Collect all unique IPs
        all_ips = set()
@@ -658,6 +680,10 @@ class SneakyScanner:

            report['sites'].append(site_result)

+        # Clean up screenshot capture browser
+        if self.screenshot_capture:
+            self.screenshot_capture._close_browser()
+
        return report

    def save_report(self, report: Dict[str, Any]) -> Path:
@@ -673,6 +699,13 @@ class SneakyScanner:


 def main():
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[logging.StreamHandler(sys.stderr)]
+    )
+
    parser = argparse.ArgumentParser(
        description='SneakyScanner - Masscan-based network scanner'
    )
--- a/src/screenshot_capture.py
+++ b/src/screenshot_capture.py
@@ -0,0 +1,201 @@
+"""
+Screenshot capture module for SneakyScanner.
+
+Uses Playwright with Chromium to capture screenshots of discovered web services.
+"""
+
+import os
+import logging
+from pathlib import Path
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
+
+
+class ScreenshotCapture:
+    """
+    Handles webpage screenshot capture for web services discovered during scanning.
+
+    Uses Playwright with Chromium in headless mode to capture viewport screenshots
+    of HTTP and HTTPS services. Handles SSL certificate errors gracefully.
+    """
+
+    def __init__(self, output_dir, scan_timestamp, timeout=15, viewport=None):
+        """
+        Initialize the screenshot capture handler.
+
+        Args:
+            output_dir (str): Base output directory for scan reports
+            scan_timestamp (str): Timestamp string for this scan (format: YYYYMMDD_HHMMSS)
+            timeout (int): Timeout in seconds for page load and screenshot (default: 15)
+            viewport (dict): Viewport size dict with 'width' and 'height' keys
+                           (default: {'width': 1280, 'height': 720})
+        """
+        self.output_dir = output_dir
+        self.scan_timestamp = scan_timestamp
+        self.timeout = timeout * 1000  # Convert to milliseconds for Playwright
+        self.viewport = viewport or {'width': 1280, 'height': 720}
+
+        self.playwright = None
+        self.browser = None
+        self.screenshot_dir = None
+
+        # Set up logging
+        self.logger = logging.getLogger('SneakyScanner.Screenshot')
+
+    def _get_screenshot_dir(self):
+        """
+        Create and return the screenshots subdirectory for this scan.
+
+        Returns:
+            Path: Path object for the screenshots directory
+        """
+        if self.screenshot_dir is None:
+            dir_name = f"scan_report_{self.scan_timestamp}_screenshots"
+            self.screenshot_dir = Path(self.output_dir) / dir_name
+            self.screenshot_dir.mkdir(parents=True, exist_ok=True)
+            self.logger.info(f"Created screenshot directory: {self.screenshot_dir}")
+
+        return self.screenshot_dir
+
+    def _generate_filename(self, ip, port):
+        """
+        Generate a filename for the screenshot.
+
+        Args:
+            ip (str): IP address of the service
+            port (int): Port number of the service
+
+        Returns:
+            str: Filename in format: {ip}_{port}.png
+        """
+        # Replace dots in IP with underscores for filesystem compatibility
+        safe_ip = ip.replace('.', '_')
+        return f"{safe_ip}_{port}.png"
+
+    def _launch_browser(self):
+        """
+        Launch Playwright and Chromium browser in headless mode.
+
+        Returns:
+            bool: True if browser launched successfully, False otherwise
+        """
+        if self.browser is not None:
+            return True  # Already launched
+
+        try:
+            self.logger.info("Launching Chromium browser...")
+            self.playwright = sync_playwright().start()
+            self.browser = self.playwright.chromium.launch(
+                headless=True,
+                args=[
+                    '--no-sandbox',
+                    '--disable-setuid-sandbox',
+                    '--disable-dev-shm-usage',
+                    '--disable-gpu',
+                ]
+            )
+            self.logger.info("Chromium browser launched successfully")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"Failed to launch browser: {e}")
+            return False
+
+    def _close_browser(self):
+        """
+        Close the browser and cleanup Playwright resources.
+        """
+        if self.browser:
+            try:
+                self.browser.close()
+                self.logger.info("Browser closed")
+            except Exception as e:
+                self.logger.warning(f"Error closing browser: {e}")
+            finally:
+                self.browser = None
+
+        if self.playwright:
+            try:
+                self.playwright.stop()
+            except Exception as e:
+                self.logger.warning(f"Error stopping playwright: {e}")
+            finally:
+                self.playwright = None
+
+    def capture(self, ip, port, protocol):
+        """
+        Capture a screenshot of a web service.
+
+        Args:
+            ip (str): IP address of the service
+            port (int): Port number of the service
+            protocol (str): Protocol to use ('http' or 'https')
+
+        Returns:
+            str: Relative path to the screenshot file, or None if capture failed
+        """
+        # Validate protocol
+        if protocol not in ['http', 'https']:
+            self.logger.warning(f"Invalid protocol '{protocol}' for {ip}:{port}")
+            return None
+
+        # Launch browser if not already running
+        if not self._launch_browser():
+            return None
+
+        # Build URL
+        url = f"{protocol}://{ip}:{port}"
+
+        # Generate screenshot filename
+        filename = self._generate_filename(ip, port)
+        screenshot_dir = self._get_screenshot_dir()
+        screenshot_path = screenshot_dir / filename
+
+        try:
+            self.logger.info(f"Capturing screenshot: {url}")
+
+            # Create new browser context with viewport and SSL settings
+            context = self.browser.new_context(
+                viewport=self.viewport,
+                ignore_https_errors=True,  # Handle self-signed certs
+                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+            )
+
+            # Create new page
+            page = context.new_page()
+
+            # Set default timeout
+            page.set_default_timeout(self.timeout)
+
+            # Navigate to URL
+            page.goto(url, wait_until='networkidle', timeout=self.timeout)
+
+            # Take screenshot (viewport only)
+            page.screenshot(path=str(screenshot_path), type='png')
+
+            # Close page and context
+            page.close()
+            context.close()
+
+            self.logger.info(f"Screenshot saved: {screenshot_path}")
+
+            # Return relative path (relative to output directory)
+            relative_path = f"{screenshot_dir.name}/{filename}"
+            return relative_path
+
+        except PlaywrightTimeout:
+            self.logger.warning(f"Timeout capturing screenshot for {url}")
+            return None
+
+        except Exception as e:
+            self.logger.warning(f"Failed to capture screenshot for {url}: {e}")
+            return None
+
+    def __enter__(self):
+        """Context manager entry."""
+        self._launch_browser()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit - cleanup browser resources."""
+        self._close_browser()
+        return False  # Don't suppress exceptions