From 212596fa0a7b88edde53924dcc9f1af37b6e0e22 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 14 Nov 2025 02:10:31 +0000 Subject: [PATCH] Add automatic multi-format report generation and ZIP archiving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements automatic generation of JSON, HTML, and ZIP outputs after every scan, with all files sharing the same timestamp for easy correlation. Features: - Automatic HTML report generation after every scan - ZIP archive creation containing JSON, HTML, and all screenshots - Unified timestamp across all outputs (JSON, HTML, ZIP, screenshots) - Graceful error handling (scan continues if HTML/ZIP generation fails) - Email-ready ZIP archives for easy sharing Technical changes: - Fixed timestamp mismatch between scan() and save_report() - Added generate_outputs() method to SneakyScanner class - scan() now returns (report, timestamp) tuple - save_report() accepts timestamp parameter instead of generating new one - main() updated to call generate_outputs() for all output formats - Added zipfile import and HTMLReportGenerator import - Dockerfile updated to copy templates/ directory Output structure: - scan_report_YYYYMMDD_HHMMSS.json (JSON report) - scan_report_YYYYMMDD_HHMMSS.html (HTML report) - scan_report_YYYYMMDD_HHMMSS.zip (archive with JSON, HTML, screenshots) - scan_report_YYYYMMDD_HHMMSS_screenshots/ (screenshots directory) Documentation updated: - README.md: Updated Output Format, Features, Quick Start sections - CLAUDE.md: Updated Core Components, Scan Workflow, Key Design Decisions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CLAUDE.md | 76 +++++++++++++++++++++++++------------ Dockerfile | 1 + README.md | 35 ++++++++++++----- src/scanner.py | 100 +++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 171 insertions(+), 41 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 986fa65..af23404 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -50,8 +50,9 @@ python3 -c "import yaml; yaml.safe_load(open('configs/example-site.yaml'))" - `_detect_http_https()`: Detects HTTP vs HTTPS using socket connections - `_analyze_ssl_tls()`: Analyzes SSL/TLS certificates and supported versions using sslyze - `_run_http_analysis()`: Orchestrates HTTP/HTTPS and SSL/TLS analysis phase - - `scan()`: Main workflow - collects IPs, runs scans, performs service detection, HTTP/HTTPS analysis, compiles results - - `save_report()`: Writes JSON output with timestamp and scan duration + - `scan()`: Main workflow - collects IPs, runs scans, performs service detection, HTTP/HTTPS analysis, compiles results and returns report with timestamp + - `save_report()`: Writes JSON output using provided timestamp + - `generate_outputs()`: Generates all output formats (JSON, HTML, ZIP) with graceful error handling 2. **src/screenshot_capture.py** - Screenshot capture module - `ScreenshotCapture` class: Handles webpage screenshot capture @@ -75,26 +76,35 @@ python3 -c "import yaml; yaml.safe_load(open('configs/example-site.yaml'))" - Define scan title, sites, IPs, and expected network behavior - Each IP includes expected ping response and TCP/UDP ports -5. **output/** - JSON scan reports and screenshots +5. **output/** - Scan outputs (automatically generated) - Timestamped JSON files: `scan_report_YYYYMMDD_HHMMSS.json` + - Timestamped HTML reports: `scan_report_YYYYMMDD_HHMMSS.html` + - Timestamped ZIP archives: `scan_report_YYYYMMDD_HHMMSS.zip` - Screenshot directory: `scan_report_YYYYMMDD_HHMMSS_screenshots/` - - Contains actual vs. expected comparison for each IP + - All outputs share the same timestamp for easy correlation + - ZIP contains JSON, HTML, and all screenshots ### Scan Workflow 1. Parse YAML config and extract all unique IPs -2. Run ping scan on all IPs using `masscan --ping` -3. Run TCP scan on all IPs for ports 0-65535 -4. Run UDP scan on all IPs for ports 0-65535 -5. Run service detection on discovered TCP ports using `nmap -sV` -6. Run HTTP/HTTPS analysis on web services identified by nmap: +2. Create scan timestamp (shared across all outputs) +3. Run ping scan on all IPs using `masscan --ping` +4. Run TCP scan on all IPs for ports 0-65535 +5. Run UDP scan on all IPs for ports 0-65535 +6. Run service detection on discovered TCP ports using `nmap -sV` +7. Run HTTP/HTTPS analysis on web services identified by nmap: - Detect HTTP vs HTTPS using socket connections - Capture webpage screenshot using Playwright (viewport 1280x720, 15s timeout) - For HTTPS: Extract certificate details (subject, issuer, expiry, SANs) - Test TLS version support (TLS 1.0, 1.1, 1.2, 1.3) - List accepted cipher suites for each TLS version -7. Aggregate results by IP and site -8. Generate JSON report with timestamp, scan duration, screenshot references, and complete service details +8. Aggregate results by IP and site +9. Return scan report and timestamp from `scan()` method +10. Automatically generate all output formats using `generate_outputs()`: + - Save JSON report with timestamp + - Generate HTML report (graceful error handling - continues if fails) + - Create ZIP archive containing JSON, HTML, and screenshots + - All outputs use the same timestamp for correlation ### Why Dockerized @@ -213,12 +223,14 @@ sites: # List of sites (required) 1. **Five-phase scanning**: Masscan for fast port discovery (10,000 pps), nmap for service detection, then HTTP/HTTPS and SSL/TLS analysis for web services 2. **All-port scanning**: TCP and UDP scans cover entire port range (0-65535) to detect unexpected services 3. **Selective web analysis**: Only analyze services identified by nmap as web-related to optimize scan time -4. **Machine-readable output**: JSON format enables automated report generation and comparison +4. **Multi-format output**: Automatically generates JSON (machine-readable), HTML (human-readable), and ZIP (archival) for every scan 5. **Expected vs. Actual**: Config includes expected behavior to identify infrastructure drift 6. **Site grouping**: IPs organized by logical site for better reporting 7. **Temporary files**: Masscan and nmap output written to temp files to avoid conflicts in parallel scans 8. **Service details**: Extract product name, version, and additional info for each discovered service 9. **SSL/TLS security**: Comprehensive certificate analysis and TLS version testing with cipher suite enumeration +10. **Unified timestamp**: All outputs (JSON, HTML, ZIP, screenshots) share the same timestamp for easy correlation +11. **Graceful degradation**: If HTML or ZIP generation fails, scan continues and JSON is still saved ## Testing Strategy @@ -226,18 +238,27 @@ When testing changes: 1. Use a controlled test environment with known services (including HTTP/HTTPS) 2. Create a test config with 1-2 IPs -3. Verify JSON output structure matches schema -4. Check that ping, TCP, and UDP results are captured -5. Verify service detection results include service name, product, and version -6. For web services, verify http_info includes: +3. Verify all three outputs are generated automatically: + - JSON report (`scan_report_YYYYMMDD_HHMMSS.json`) + - HTML report (`scan_report_YYYYMMDD_HHMMSS.html`) + - ZIP archive (`scan_report_YYYYMMDD_HHMMSS.zip`) +4. Verify all outputs share the same timestamp +5. Check that ping, TCP, and UDP results are captured in JSON +6. Verify service detection results include service name, product, and version +7. For web services, verify http_info includes: - Correct protocol detection (http vs https) - Screenshot path reference (relative to output directory) - Verify screenshot PNG file exists at the referenced path - Certificate details for HTTPS (subject, issuer, expiry, SANs) - TLS version support (1.0-1.3) with cipher suites -7. Ensure temp files are cleaned up (masscan JSON, nmap XML) -8. Verify screenshot directory created with correct naming convention -9. Test screenshot capture with HTTP, HTTPS, and self-signed certificate services +8. Verify HTML report opens in browser and displays correctly +9. Verify ZIP archive contains: + - JSON report file + - HTML report file + - Screenshot directory with all PNG files +10. Ensure temp files are cleaned up (masscan JSON, nmap XML) +11. Test screenshot capture with HTTP, HTTPS, and self-signed certificate services +12. Test graceful degradation: If HTML generation fails, JSON and ZIP should still be created ## Common Tasks @@ -278,9 +299,11 @@ JSON structure defined in src/scanner.py:365+. To modify: ### Generating HTML Reports +**Note**: HTML reports are automatically generated after every scan. The commands below are for manual generation from existing JSON data only. + **Basic usage:** ```bash -# Generate HTML report from most recent JSON scan +# Manually generate HTML report from existing JSON scan python3 src/report_generator.py output/scan_report_20251113_175235.json ``` @@ -386,11 +409,16 @@ Optimization strategies: ## HTML Report Generation (✅ Implemented) -SneakyScanner now includes comprehensive HTML report generation from JSON scan data. +SneakyScanner automatically generates comprehensive HTML reports after every scan, along with JSON reports and ZIP archives. -**Usage:** +**Automatic Generation:** +- HTML reports are created automatically by `generate_outputs()` method after scan completes +- All outputs (JSON, HTML, ZIP) share the same timestamp for correlation +- Graceful error handling: If HTML generation fails, scan continues with JSON output + +**Manual Generation (Optional):** ```bash -# Generate HTML report from JSON scan output +# Manually generate HTML report from existing JSON scan output python3 src/report_generator.py output/scan_report_20251113_175235.json # Specify custom output path @@ -471,7 +499,7 @@ Generate reports showing changes between scans over time. - sslyze==6.0.0 (SSL/TLS analysis) - playwright==1.40.0 (webpage screenshot capture) - Jinja2==3.1.2 (HTML report template engine) -- Built-in: socket, ssl, subprocess, xml.etree.ElementTree, logging, json, pathlib, datetime +- Built-in: socket, ssl, subprocess, xml.etree.ElementTree, logging, json, pathlib, datetime, zipfile - System: chromium, chromium-driver (installed via Dockerfile) ### For Future Enhancements, May Need: diff --git a/Dockerfile b/Dockerfile index dc9b420..f0b5661 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,6 +32,7 @@ RUN playwright install chromium # Copy application code COPY src/ ./src/ +COPY templates/ ./templates/ # Create output directory RUN mkdir -p /app/output diff --git a/README.md b/README.md index b4c24a6..33e9220 100644 --- a/README.md +++ b/README.md @@ -40,14 +40,17 @@ A dockerized network scanning tool that uses masscan for fast port discovery, nm - Browser reuse for optimal performance ### Reporting & Output -- **Machine-readable JSON output** format for easy post-processing -- **HTML report generation**: - - Comprehensive HTML reports with dark theme for easy reading +- **Automatic multi-format output** after each scan: + - Machine-readable JSON reports for post-processing + - Human-readable HTML reports with dark theme + - ZIP archives containing all outputs for easy sharing +- **HTML report features**: + - Comprehensive reports with dark theme for easy reading - Summary dashboard with scan statistics, drift alerts, and security warnings - Site-by-site breakdown with expandable service details - Visual badges for expected vs. unexpected services - SSL/TLS certificate details with expiration warnings - - One-click generation from JSON scan data + - Automatically generated after every scan - **Dockerized** for consistent execution environment and root privilege isolation - **Expected vs. Actual comparison** to identify infrastructure drift - Timestamped reports with complete scan duration metrics @@ -82,7 +85,11 @@ docker-compose build docker-compose up ``` -3. Check results in the `output/` directory +3. Check results in the `output/` directory: + - `scan_report_YYYYMMDD_HHMMSS.json` - JSON report + - `scan_report_YYYYMMDD_HHMMSS.html` - HTML report + - `scan_report_YYYYMMDD_HHMMSS.zip` - ZIP archive + - `scan_report_YYYYMMDD_HHMMSS_screenshots/` - Screenshots directory ## Scan Performance @@ -133,7 +140,13 @@ See `configs/example-site.yaml` for a complete example. ## Output Format -Scan results are saved as JSON files in the `output/` directory with timestamps. Screenshots are saved in a subdirectory with the same timestamp. The report includes the total scan duration (in seconds) covering all phases: ping scan, TCP/UDP port discovery, service detection, and screenshot capture. +After each scan completes, SneakyScanner automatically generates three output formats: + +1. **JSON Report** (`scan_report_YYYYMMDD_HHMMSS.json`): Machine-readable scan data with all discovered services, ports, and SSL/TLS information +2. **HTML Report** (`scan_report_YYYYMMDD_HHMMSS.html`): Human-readable report with dark theme, summary dashboard, and detailed service breakdown +3. **ZIP Archive** (`scan_report_YYYYMMDD_HHMMSS.zip`): Contains JSON report, HTML report, and all screenshots for easy sharing and archival + +All files share the same timestamp for easy correlation. Screenshots are saved in a subdirectory (`scan_report_YYYYMMDD_HHMMSS_screenshots/`) and included in the ZIP archive. The report includes the total scan duration (in seconds) covering all phases: ping scan, TCP/UDP port discovery, service detection, screenshot capture, and report generation. ```json { @@ -278,11 +291,15 @@ Screenshots are captured on a best-effort basis: ## HTML Report Generation -SneakyScanner can generate comprehensive HTML reports from JSON scan data, providing an easy-to-read visual interface for analyzing scan results. +SneakyScanner automatically generates comprehensive HTML reports after each scan, providing an easy-to-read visual interface for analyzing scan results. -### Generating Reports +### Automatic Generation -After completing a scan, generate an HTML report from the JSON output: +HTML reports are automatically created after every scan completes, along with JSON reports and ZIP archives. All three outputs share the same timestamp and are saved to the `output/` directory. + +### Manual Generation (Optional) + +You can also manually generate HTML reports from existing JSON scan data: ```bash # Generate HTML report (creates report in same directory as JSON) diff --git a/src/scanner.py b/src/scanner.py index 8a48b57..0860cf3 100644 --- a/src/scanner.py +++ b/src/scanner.py @@ -10,6 +10,7 @@ import subprocess import sys import tempfile import time +import zipfile from datetime import datetime from pathlib import Path from typing import Dict, List, Any @@ -20,6 +21,7 @@ from libnmap.process import NmapProcess from libnmap.parser import NmapParser from screenshot_capture import ScreenshotCapture +from report_generator import HTMLReportGenerator # Force unbuffered output for Docker sys.stdout.reconfigure(line_buffering=True) @@ -684,12 +686,11 @@ class SneakyScanner: if self.screenshot_capture: self.screenshot_capture._close_browser() - return report + return report, scan_timestamp - def save_report(self, report: Dict[str, Any]) -> Path: - """Save scan report to JSON file""" - timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') - output_file = self.output_dir / f"scan_report_{timestamp}.json" + def save_report(self, report: Dict[str, Any], scan_timestamp: str) -> Path: + """Save scan report to JSON file using provided timestamp""" + output_file = self.output_dir / f"scan_report_{scan_timestamp}.json" with open(output_file, 'w') as f: json.dump(report, f, indent=2) @@ -697,6 +698,86 @@ class SneakyScanner: print(f"\nReport saved to: {output_file}", flush=True) return output_file + def generate_outputs(self, report: Dict[str, Any], scan_timestamp: str) -> Dict[str, Path]: + """ + Generate all output formats: JSON, HTML report, and ZIP archive + + Args: + report: Scan report dictionary + scan_timestamp: Timestamp string in format YYYYMMDD_HHMMSS + + Returns: + Dictionary with paths to generated files: {'json': Path, 'html': Path, 'zip': Path} + """ + output_paths = {} + + # Step 1: Save JSON report + print("\n" + "="*60, flush=True) + print("Generating outputs...", flush=True) + print("="*60, flush=True) + + json_path = self.save_report(report, scan_timestamp) + output_paths['json'] = json_path + + # Step 2: Generate HTML report + html_path = self.output_dir / f"scan_report_{scan_timestamp}.html" + + try: + print(f"\nGenerating HTML report...", flush=True) + + # Auto-detect template directory relative to this script + template_dir = Path(__file__).parent.parent / 'templates' + + # Create HTML report generator + generator = HTMLReportGenerator( + json_report_path=str(json_path), + template_dir=str(template_dir) + ) + + # Generate report + html_result = generator.generate_report(output_path=str(html_path)) + output_paths['html'] = Path(html_result) + + print(f"HTML report saved to: {html_path}", flush=True) + + except Exception as e: + print(f"Warning: HTML report generation failed: {e}", file=sys.stderr, flush=True) + print(f"Continuing with JSON output only...", file=sys.stderr, flush=True) + # Don't add html_path to output_paths if it failed + + # Step 3: Create ZIP archive + zip_path = self.output_dir / f"scan_report_{scan_timestamp}.zip" + + try: + print(f"\nCreating ZIP archive...", flush=True) + + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + # Add JSON report + zipf.write(json_path, json_path.name) + + # Add HTML report if it was generated + if 'html' in output_paths and html_path.exists(): + zipf.write(html_path, html_path.name) + + # Add screenshots directory if it exists + screenshot_dir = self.output_dir / f"scan_report_{scan_timestamp}_screenshots" + if screenshot_dir.exists() and screenshot_dir.is_dir(): + # Add all files in screenshot directory + for screenshot_file in screenshot_dir.iterdir(): + if screenshot_file.is_file(): + # Preserve directory structure in ZIP + arcname = f"{screenshot_dir.name}/{screenshot_file.name}" + zipf.write(screenshot_file, arcname) + + output_paths['zip'] = zip_path + print(f"ZIP archive saved to: {zip_path}", flush=True) + + except Exception as e: + print(f"Warning: ZIP archive creation failed: {e}", file=sys.stderr, flush=True) + # Don't add zip_path to output_paths if it failed + + return output_paths + def main(): # Configure logging @@ -723,12 +804,15 @@ def main(): try: scanner = SneakyScanner(args.config, args.output_dir) - report = scanner.scan() - output_file = scanner.save_report(report) + report, scan_timestamp = scanner.scan() + output_paths = scanner.generate_outputs(report, scan_timestamp) print("\n" + "="*60, flush=True) print("Scan completed successfully!", flush=True) - print(f"Results: {output_file}", flush=True) + print("="*60, flush=True) + print(f" JSON Report: {output_paths.get('json', 'N/A')}", flush=True) + print(f" HTML Report: {output_paths.get('html', 'N/A')}", flush=True) + print(f" ZIP Archive: {output_paths.get('zip', 'N/A')}", flush=True) print("="*60, flush=True) return 0