Add webpage screenshot capture with Playwright

Implements automated screenshot capture for all discovered HTTP/HTTPS services using Playwright with headless Chromium. Screenshots are saved as PNG files and referenced in JSON reports.

Features:
- Separate ScreenshotCapture module for code organization
- Viewport screenshots (1280x720) with 15-second timeout
- Graceful handling of self-signed certificates
- Browser reuse for optimal performance
- Screenshots stored in timestamped directories
- Comprehensive documentation in README.md and new CLAUDE.md

Technical changes:
- Added src/screenshot_capture.py: Screenshot capture module with context manager pattern
- Updated src/scanner.py: Integrated screenshot capture into HTTP/HTTPS analysis phase
- Updated Dockerfile: Added Chromium and Playwright browser installation
- Updated requirements.txt: Added playwright==1.40.0
- Added CLAUDE.md: Developer documentation and implementation guide
- Updated README.md: Enhanced features section, added screenshot details and troubleshooting
- Updated .gitignore: Ignore entire output/ directory including screenshots

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-14 00:57:36 +00:00
parent 48755a8539
commit 61cc24f8d2
7 changed files with 822 additions and 25 deletions

View File

@@ -5,6 +5,7 @@ SneakyScanner - Masscan-based network scanner with YAML configuration
import argparse
import json
import logging
import subprocess
import sys
import tempfile
@@ -18,6 +19,8 @@ import yaml
from libnmap.process import NmapProcess
from libnmap.parser import NmapParser
from screenshot_capture import ScreenshotCapture
# Force unbuffered output for Docker
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)
@@ -31,6 +34,7 @@ class SneakyScanner:
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.config = self._load_config()
self.screenshot_capture = None
def _load_config(self) -> Dict[str, Any]:
"""Load and validate YAML configuration"""
@@ -511,6 +515,16 @@ class SneakyScanner:
result = {'protocol': protocol}
# Capture screenshot if screenshot capture is enabled
if self.screenshot_capture:
try:
screenshot_path = self.screenshot_capture.capture(ip, port, protocol)
if screenshot_path:
result['screenshot'] = screenshot_path
except Exception as e:
print(f" Screenshot capture error for {ip}:{port}: {e}",
file=sys.stderr, flush=True)
# If HTTPS, analyze SSL/TLS
if protocol == 'https':
try:
@@ -545,6 +559,14 @@ class SneakyScanner:
# Record start time
start_time = time.time()
scan_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Initialize screenshot capture
self.screenshot_capture = ScreenshotCapture(
output_dir=str(self.output_dir),
scan_timestamp=scan_timestamp,
timeout=15
)
# Collect all unique IPs
all_ips = set()
@@ -658,6 +680,10 @@ class SneakyScanner:
report['sites'].append(site_result)
# Clean up screenshot capture browser
if self.screenshot_capture:
self.screenshot_capture._close_browser()
return report
def save_report(self, report: Dict[str, Any]) -> Path:
@@ -673,6 +699,13 @@ class SneakyScanner:
def main():
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler(sys.stderr)]
)
parser = argparse.ArgumentParser(
description='SneakyScanner - Masscan-based network scanner'
)

201
src/screenshot_capture.py Normal file
View File

@@ -0,0 +1,201 @@
"""
Screenshot capture module for SneakyScanner.
Uses Playwright with Chromium to capture screenshots of discovered web services.
"""
import os
import logging
from pathlib import Path
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
class ScreenshotCapture:
"""
Handles webpage screenshot capture for web services discovered during scanning.
Uses Playwright with Chromium in headless mode to capture viewport screenshots
of HTTP and HTTPS services. Handles SSL certificate errors gracefully.
"""
def __init__(self, output_dir, scan_timestamp, timeout=15, viewport=None):
"""
Initialize the screenshot capture handler.
Args:
output_dir (str): Base output directory for scan reports
scan_timestamp (str): Timestamp string for this scan (format: YYYYMMDD_HHMMSS)
timeout (int): Timeout in seconds for page load and screenshot (default: 15)
viewport (dict): Viewport size dict with 'width' and 'height' keys
(default: {'width': 1280, 'height': 720})
"""
self.output_dir = output_dir
self.scan_timestamp = scan_timestamp
self.timeout = timeout * 1000 # Convert to milliseconds for Playwright
self.viewport = viewport or {'width': 1280, 'height': 720}
self.playwright = None
self.browser = None
self.screenshot_dir = None
# Set up logging
self.logger = logging.getLogger('SneakyScanner.Screenshot')
def _get_screenshot_dir(self):
"""
Create and return the screenshots subdirectory for this scan.
Returns:
Path: Path object for the screenshots directory
"""
if self.screenshot_dir is None:
dir_name = f"scan_report_{self.scan_timestamp}_screenshots"
self.screenshot_dir = Path(self.output_dir) / dir_name
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
self.logger.info(f"Created screenshot directory: {self.screenshot_dir}")
return self.screenshot_dir
def _generate_filename(self, ip, port):
"""
Generate a filename for the screenshot.
Args:
ip (str): IP address of the service
port (int): Port number of the service
Returns:
str: Filename in format: {ip}_{port}.png
"""
# Replace dots in IP with underscores for filesystem compatibility
safe_ip = ip.replace('.', '_')
return f"{safe_ip}_{port}.png"
def _launch_browser(self):
"""
Launch Playwright and Chromium browser in headless mode.
Returns:
bool: True if browser launched successfully, False otherwise
"""
if self.browser is not None:
return True # Already launched
try:
self.logger.info("Launching Chromium browser...")
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
]
)
self.logger.info("Chromium browser launched successfully")
return True
except Exception as e:
self.logger.error(f"Failed to launch browser: {e}")
return False
def _close_browser(self):
"""
Close the browser and cleanup Playwright resources.
"""
if self.browser:
try:
self.browser.close()
self.logger.info("Browser closed")
except Exception as e:
self.logger.warning(f"Error closing browser: {e}")
finally:
self.browser = None
if self.playwright:
try:
self.playwright.stop()
except Exception as e:
self.logger.warning(f"Error stopping playwright: {e}")
finally:
self.playwright = None
def capture(self, ip, port, protocol):
"""
Capture a screenshot of a web service.
Args:
ip (str): IP address of the service
port (int): Port number of the service
protocol (str): Protocol to use ('http' or 'https')
Returns:
str: Relative path to the screenshot file, or None if capture failed
"""
# Validate protocol
if protocol not in ['http', 'https']:
self.logger.warning(f"Invalid protocol '{protocol}' for {ip}:{port}")
return None
# Launch browser if not already running
if not self._launch_browser():
return None
# Build URL
url = f"{protocol}://{ip}:{port}"
# Generate screenshot filename
filename = self._generate_filename(ip, port)
screenshot_dir = self._get_screenshot_dir()
screenshot_path = screenshot_dir / filename
try:
self.logger.info(f"Capturing screenshot: {url}")
# Create new browser context with viewport and SSL settings
context = self.browser.new_context(
viewport=self.viewport,
ignore_https_errors=True, # Handle self-signed certs
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
)
# Create new page
page = context.new_page()
# Set default timeout
page.set_default_timeout(self.timeout)
# Navigate to URL
page.goto(url, wait_until='networkidle', timeout=self.timeout)
# Take screenshot (viewport only)
page.screenshot(path=str(screenshot_path), type='png')
# Close page and context
page.close()
context.close()
self.logger.info(f"Screenshot saved: {screenshot_path}")
# Return relative path (relative to output directory)
relative_path = f"{screenshot_dir.name}/{filename}"
return relative_path
except PlaywrightTimeout:
self.logger.warning(f"Timeout capturing screenshot for {url}")
return None
except Exception as e:
self.logger.warning(f"Failed to capture screenshot for {url}: {e}")
return None
def __enter__(self):
"""Context manager entry."""
self._launch_browser()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - cleanup browser resources."""
self._close_browser()
return False # Don't suppress exceptions