From 212596fa0a7b88edde53924dcc9f1af37b6e0e22 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 14 Nov 2025 02:10:31 +0000
Subject: [PATCH] Add automatic multi-format report generation and ZIP
 archiving
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements automatic generation of JSON, HTML, and ZIP outputs after every scan,
with all files sharing the same timestamp for easy correlation.

Features:
- Automatic HTML report generation after every scan
- ZIP archive creation containing JSON, HTML, and all screenshots
- Unified timestamp across all outputs (JSON, HTML, ZIP, screenshots)
- Graceful error handling (scan continues if HTML/ZIP generation fails)
- Email-ready ZIP archives for easy sharing

Technical changes:
- Fixed timestamp mismatch between scan() and save_report()
- Added generate_outputs() method to SneakyScanner class
- scan() now returns (report, timestamp) tuple
- save_report() accepts timestamp parameter instead of generating new one
- main() updated to call generate_outputs() for all output formats
- Added zipfile import and HTMLReportGenerator import
- Dockerfile updated to copy templates/ directory

Output structure:
- scan_report_YYYYMMDD_HHMMSS.json (JSON report)
- scan_report_YYYYMMDD_HHMMSS.html (HTML report)
- scan_report_YYYYMMDD_HHMMSS.zip (archive with JSON, HTML, screenshots)
- scan_report_YYYYMMDD_HHMMSS_screenshots/ (screenshots directory)

Documentation updated:
- README.md: Updated Output Format, Features, Quick Start sections
- CLAUDE.md: Updated Core Components, Scan Workflow, Key Design Decisions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 CLAUDE.md      |  76 +++++++++++++++++++++++++------------
 Dockerfile     |   1 +
 README.md      |  35 ++++++++++++-----
 src/scanner.py | 100 +++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 171 insertions(+), 41 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 986fa65..af23404 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -50,8 +50,9 @@ python3 -c "import yaml; yaml.safe_load(open('configs/example-site.yaml'))"
    - `_detect_http_https()`: Detects HTTP vs HTTPS using socket connections
    - `_analyze_ssl_tls()`: Analyzes SSL/TLS certificates and supported versions using sslyze
    - `_run_http_analysis()`: Orchestrates HTTP/HTTPS and SSL/TLS analysis phase
-   - `scan()`: Main workflow - collects IPs, runs scans, performs service detection, HTTP/HTTPS analysis, compiles results
-   - `save_report()`: Writes JSON output with timestamp and scan duration
+   - `scan()`: Main workflow - collects IPs, runs scans, performs service detection, HTTP/HTTPS analysis, compiles results and returns report with timestamp
+   - `save_report()`: Writes JSON output using provided timestamp
+   - `generate_outputs()`: Generates all output formats (JSON, HTML, ZIP) with graceful error handling
 
 2. **src/screenshot_capture.py** - Screenshot capture module
    - `ScreenshotCapture` class: Handles webpage screenshot capture
@@ -75,26 +76,35 @@ python3 -c "import yaml; yaml.safe_load(open('configs/example-site.yaml'))"
    - Define scan title, sites, IPs, and expected network behavior
    - Each IP includes expected ping response and TCP/UDP ports
 
-5. **output/** - JSON scan reports and screenshots
+5. **output/** - Scan outputs (automatically generated)
    - Timestamped JSON files: `scan_report_YYYYMMDD_HHMMSS.json`
+   - Timestamped HTML reports: `scan_report_YYYYMMDD_HHMMSS.html`
+   - Timestamped ZIP archives: `scan_report_YYYYMMDD_HHMMSS.zip`
    - Screenshot directory: `scan_report_YYYYMMDD_HHMMSS_screenshots/`
-   - Contains actual vs. expected comparison for each IP
+   - All outputs share the same timestamp for easy correlation
+   - ZIP contains JSON, HTML, and all screenshots
 
 ### Scan Workflow
 
 1. Parse YAML config and extract all unique IPs
-2. Run ping scan on all IPs using `masscan --ping`
-3. Run TCP scan on all IPs for ports 0-65535
-4. Run UDP scan on all IPs for ports 0-65535
-5. Run service detection on discovered TCP ports using `nmap -sV`
-6. Run HTTP/HTTPS analysis on web services identified by nmap:
+2. Create scan timestamp (shared across all outputs)
+3. Run ping scan on all IPs using `masscan --ping`
+4. Run TCP scan on all IPs for ports 0-65535
+5. Run UDP scan on all IPs for ports 0-65535
+6. Run service detection on discovered TCP ports using `nmap -sV`
+7. Run HTTP/HTTPS analysis on web services identified by nmap:
    - Detect HTTP vs HTTPS using socket connections
    - Capture webpage screenshot using Playwright (viewport 1280x720, 15s timeout)
    - For HTTPS: Extract certificate details (subject, issuer, expiry, SANs)
    - Test TLS version support (TLS 1.0, 1.1, 1.2, 1.3)
    - List accepted cipher suites for each TLS version
-7. Aggregate results by IP and site
-8. Generate JSON report with timestamp, scan duration, screenshot references, and complete service details
+8. Aggregate results by IP and site
+9. Return scan report and timestamp from `scan()` method
+10. Automatically generate all output formats using `generate_outputs()`:
+    - Save JSON report with timestamp
+    - Generate HTML report (graceful error handling - continues if fails)
+    - Create ZIP archive containing JSON, HTML, and screenshots
+    - All outputs use the same timestamp for correlation
 
 ### Why Dockerized
 
@@ -213,12 +223,14 @@ sites:                           # List of sites (required)
 1. **Five-phase scanning**: Masscan for fast port discovery (10,000 pps), nmap for service detection, then HTTP/HTTPS and SSL/TLS analysis for web services
 2. **All-port scanning**: TCP and UDP scans cover entire port range (0-65535) to detect unexpected services
 3. **Selective web analysis**: Only analyze services identified by nmap as web-related to optimize scan time
-4. **Machine-readable output**: JSON format enables automated report generation and comparison
+4. **Multi-format output**: Automatically generates JSON (machine-readable), HTML (human-readable), and ZIP (archival) for every scan
 5. **Expected vs. Actual**: Config includes expected behavior to identify infrastructure drift
 6. **Site grouping**: IPs organized by logical site for better reporting
 7. **Temporary files**: Masscan and nmap output written to temp files to avoid conflicts in parallel scans
 8. **Service details**: Extract product name, version, and additional info for each discovered service
 9. **SSL/TLS security**: Comprehensive certificate analysis and TLS version testing with cipher suite enumeration
+10. **Unified timestamp**: All outputs (JSON, HTML, ZIP, screenshots) share the same timestamp for easy correlation
+11. **Graceful degradation**: If HTML or ZIP generation fails, scan continues and JSON is still saved
 
 ## Testing Strategy
 
@@ -226,18 +238,27 @@ When testing changes:
 
 1. Use a controlled test environment with known services (including HTTP/HTTPS)
 2. Create a test config with 1-2 IPs
-3. Verify JSON output structure matches schema
-4. Check that ping, TCP, and UDP results are captured
-5. Verify service detection results include service name, product, and version
-6. For web services, verify http_info includes:
+3. Verify all three outputs are generated automatically:
+   - JSON report (`scan_report_YYYYMMDD_HHMMSS.json`)
+   - HTML report (`scan_report_YYYYMMDD_HHMMSS.html`)
+   - ZIP archive (`scan_report_YYYYMMDD_HHMMSS.zip`)
+4. Verify all outputs share the same timestamp
+5. Check that ping, TCP, and UDP results are captured in JSON
+6. Verify service detection results include service name, product, and version
+7. For web services, verify http_info includes:
    - Correct protocol detection (http vs https)
    - Screenshot path reference (relative to output directory)
    - Verify screenshot PNG file exists at the referenced path
    - Certificate details for HTTPS (subject, issuer, expiry, SANs)
    - TLS version support (1.0-1.3) with cipher suites
-7. Ensure temp files are cleaned up (masscan JSON, nmap XML)
-8. Verify screenshot directory created with correct naming convention
-9. Test screenshot capture with HTTP, HTTPS, and self-signed certificate services
+8. Verify HTML report opens in browser and displays correctly
+9. Verify ZIP archive contains:
+   - JSON report file
+   - HTML report file
+   - Screenshot directory with all PNG files
+10. Ensure temp files are cleaned up (masscan JSON, nmap XML)
+11. Test screenshot capture with HTTP, HTTPS, and self-signed certificate services
+12. Test graceful degradation: If HTML generation fails, JSON and ZIP should still be created
 
 ## Common Tasks
 
@@ -278,9 +299,11 @@ JSON structure defined in src/scanner.py:365+. To modify:
 
 ### Generating HTML Reports
 
+**Note**: HTML reports are automatically generated after every scan. The commands below are for manual generation from existing JSON data only.
+
 **Basic usage:**
 ```bash
-# Generate HTML report from most recent JSON scan
+# Manually generate HTML report from existing JSON scan
 python3 src/report_generator.py output/scan_report_20251113_175235.json
 ```
 
@@ -386,11 +409,16 @@ Optimization strategies:
 
 ## HTML Report Generation (✅ Implemented)
 
-SneakyScanner now includes comprehensive HTML report generation from JSON scan data.
+SneakyScanner automatically generates comprehensive HTML reports after every scan, along with JSON reports and ZIP archives.
 
-**Usage:**
+**Automatic Generation:**
+- HTML reports are created automatically by `generate_outputs()` method after scan completes
+- All outputs (JSON, HTML, ZIP) share the same timestamp for correlation
+- Graceful error handling: If HTML generation fails, scan continues with JSON output
+
+**Manual Generation (Optional):**
 ```bash
-# Generate HTML report from JSON scan output
+# Manually generate HTML report from existing JSON scan output
 python3 src/report_generator.py output/scan_report_20251113_175235.json
 
 # Specify custom output path
@@ -471,7 +499,7 @@ Generate reports showing changes between scans over time.
 - sslyze==6.0.0 (SSL/TLS analysis)
 - playwright==1.40.0 (webpage screenshot capture)
 - Jinja2==3.1.2 (HTML report template engine)
-- Built-in: socket, ssl, subprocess, xml.etree.ElementTree, logging, json, pathlib, datetime
+- Built-in: socket, ssl, subprocess, xml.etree.ElementTree, logging, json, pathlib, datetime, zipfile
 - System: chromium, chromium-driver (installed via Dockerfile)
 
 ### For Future Enhancements, May Need:
diff --git a/Dockerfile b/Dockerfile
index dc9b420..f0b5661 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -32,6 +32,7 @@ RUN playwright install chromium
 
 # Copy application code
 COPY src/ ./src/
+COPY templates/ ./templates/
 
 # Create output directory
 RUN mkdir -p /app/output
diff --git a/README.md b/README.md
index b4c24a6..33e9220 100644
--- a/README.md
+++ b/README.md
@@ -40,14 +40,17 @@ A dockerized network scanning tool that uses masscan for fast port discovery, nm
   - Browser reuse for optimal performance
 
 ### Reporting & Output
-- **Machine-readable JSON output** format for easy post-processing
-- **HTML report generation**:
-  - Comprehensive HTML reports with dark theme for easy reading
+- **Automatic multi-format output** after each scan:
+  - Machine-readable JSON reports for post-processing
+  - Human-readable HTML reports with dark theme
+  - ZIP archives containing all outputs for easy sharing
+- **HTML report features**:
+  - Comprehensive reports with dark theme for easy reading
   - Summary dashboard with scan statistics, drift alerts, and security warnings
   - Site-by-site breakdown with expandable service details
   - Visual badges for expected vs. unexpected services
   - SSL/TLS certificate details with expiration warnings
-  - One-click generation from JSON scan data
+  - Automatically generated after every scan
 - **Dockerized** for consistent execution environment and root privilege isolation
 - **Expected vs. Actual comparison** to identify infrastructure drift
 - Timestamped reports with complete scan duration metrics
@@ -82,7 +85,11 @@ docker-compose build
 docker-compose up
 ```
 
-3. Check results in the `output/` directory
+3. Check results in the `output/` directory:
+   - `scan_report_YYYYMMDD_HHMMSS.json` - JSON report
+   - `scan_report_YYYYMMDD_HHMMSS.html` - HTML report
+   - `scan_report_YYYYMMDD_HHMMSS.zip` - ZIP archive
+   - `scan_report_YYYYMMDD_HHMMSS_screenshots/` - Screenshots directory
 
 ## Scan Performance
 
@@ -133,7 +140,13 @@ See `configs/example-site.yaml` for a complete example.
 
 ## Output Format
 
-Scan results are saved as JSON files in the `output/` directory with timestamps. Screenshots are saved in a subdirectory with the same timestamp. The report includes the total scan duration (in seconds) covering all phases: ping scan, TCP/UDP port discovery, service detection, and screenshot capture.
+After each scan completes, SneakyScanner automatically generates three output formats:
+
+1. **JSON Report** (`scan_report_YYYYMMDD_HHMMSS.json`): Machine-readable scan data with all discovered services, ports, and SSL/TLS information
+2. **HTML Report** (`scan_report_YYYYMMDD_HHMMSS.html`): Human-readable report with dark theme, summary dashboard, and detailed service breakdown
+3. **ZIP Archive** (`scan_report_YYYYMMDD_HHMMSS.zip`): Contains JSON report, HTML report, and all screenshots for easy sharing and archival
+
+All files share the same timestamp for easy correlation. Screenshots are saved in a subdirectory (`scan_report_YYYYMMDD_HHMMSS_screenshots/`) and included in the ZIP archive. The report includes the total scan duration (in seconds) covering all phases: ping scan, TCP/UDP port discovery, service detection, screenshot capture, and report generation.
 
 ```json
 {
@@ -278,11 +291,15 @@ Screenshots are captured on a best-effort basis:
 
 ## HTML Report Generation
 
-SneakyScanner can generate comprehensive HTML reports from JSON scan data, providing an easy-to-read visual interface for analyzing scan results.
+SneakyScanner automatically generates comprehensive HTML reports after each scan, providing an easy-to-read visual interface for analyzing scan results.
 
-### Generating Reports
+### Automatic Generation
 
-After completing a scan, generate an HTML report from the JSON output:
+HTML reports are automatically created after every scan completes, along with JSON reports and ZIP archives. All three outputs share the same timestamp and are saved to the `output/` directory.
+
+### Manual Generation (Optional)
+
+You can also manually generate HTML reports from existing JSON scan data:
 
 ```bash
 # Generate HTML report (creates report in same directory as JSON)
diff --git a/src/scanner.py b/src/scanner.py
index 8a48b57..0860cf3 100644
--- a/src/scanner.py
+++ b/src/scanner.py
@@ -10,6 +10,7 @@ import subprocess
 import sys
 import tempfile
 import time
+import zipfile
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Any
@@ -20,6 +21,7 @@ from libnmap.process import NmapProcess
 from libnmap.parser import NmapParser
 
 from screenshot_capture import ScreenshotCapture
+from report_generator import HTMLReportGenerator
 
 # Force unbuffered output for Docker
 sys.stdout.reconfigure(line_buffering=True)
@@ -684,12 +686,11 @@ class SneakyScanner:
         if self.screenshot_capture:
             self.screenshot_capture._close_browser()
 
-        return report
+        return report, scan_timestamp
 
-    def save_report(self, report: Dict[str, Any]) -> Path:
-        """Save scan report to JSON file"""
-        timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
-        output_file = self.output_dir / f"scan_report_{timestamp}.json"
+    def save_report(self, report: Dict[str, Any], scan_timestamp: str) -> Path:
+        """Save scan report to JSON file using provided timestamp"""
+        output_file = self.output_dir / f"scan_report_{scan_timestamp}.json"
 
         with open(output_file, 'w') as f:
             json.dump(report, f, indent=2)
@@ -697,6 +698,86 @@ class SneakyScanner:
         print(f"\nReport saved to: {output_file}", flush=True)
         return output_file
 
+    def generate_outputs(self, report: Dict[str, Any], scan_timestamp: str) -> Dict[str, Path]:
+        """
+        Generate all output formats: JSON, HTML report, and ZIP archive
+
+        Args:
+            report: Scan report dictionary
+            scan_timestamp: Timestamp string in format YYYYMMDD_HHMMSS
+
+        Returns:
+            Dictionary with paths to generated files: {'json': Path, 'html': Path, 'zip': Path}
+        """
+        output_paths = {}
+
+        # Step 1: Save JSON report
+        print("\n" + "="*60, flush=True)
+        print("Generating outputs...", flush=True)
+        print("="*60, flush=True)
+
+        json_path = self.save_report(report, scan_timestamp)
+        output_paths['json'] = json_path
+
+        # Step 2: Generate HTML report
+        html_path = self.output_dir / f"scan_report_{scan_timestamp}.html"
+
+        try:
+            print(f"\nGenerating HTML report...", flush=True)
+
+            # Auto-detect template directory relative to this script
+            template_dir = Path(__file__).parent.parent / 'templates'
+
+            # Create HTML report generator
+            generator = HTMLReportGenerator(
+                json_report_path=str(json_path),
+                template_dir=str(template_dir)
+            )
+
+            # Generate report
+            html_result = generator.generate_report(output_path=str(html_path))
+            output_paths['html'] = Path(html_result)
+
+            print(f"HTML report saved to: {html_path}", flush=True)
+
+        except Exception as e:
+            print(f"Warning: HTML report generation failed: {e}", file=sys.stderr, flush=True)
+            print(f"Continuing with JSON output only...", file=sys.stderr, flush=True)
+            # Don't add html_path to output_paths if it failed
+
+        # Step 3: Create ZIP archive
+        zip_path = self.output_dir / f"scan_report_{scan_timestamp}.zip"
+
+        try:
+            print(f"\nCreating ZIP archive...", flush=True)
+
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                # Add JSON report
+                zipf.write(json_path, json_path.name)
+
+                # Add HTML report if it was generated
+                if 'html' in output_paths and html_path.exists():
+                    zipf.write(html_path, html_path.name)
+
+                # Add screenshots directory if it exists
+                screenshot_dir = self.output_dir / f"scan_report_{scan_timestamp}_screenshots"
+                if screenshot_dir.exists() and screenshot_dir.is_dir():
+                    # Add all files in screenshot directory
+                    for screenshot_file in screenshot_dir.iterdir():
+                        if screenshot_file.is_file():
+                            # Preserve directory structure in ZIP
+                            arcname = f"{screenshot_dir.name}/{screenshot_file.name}"
+                            zipf.write(screenshot_file, arcname)
+
+            output_paths['zip'] = zip_path
+            print(f"ZIP archive saved to: {zip_path}", flush=True)
+
+        except Exception as e:
+            print(f"Warning: ZIP archive creation failed: {e}", file=sys.stderr, flush=True)
+            # Don't add zip_path to output_paths if it failed
+
+        return output_paths
+
 
 def main():
     # Configure logging
@@ -723,12 +804,15 @@ def main():
 
     try:
         scanner = SneakyScanner(args.config, args.output_dir)
-        report = scanner.scan()
-        output_file = scanner.save_report(report)
+        report, scan_timestamp = scanner.scan()
+        output_paths = scanner.generate_outputs(report, scan_timestamp)
 
         print("\n" + "="*60, flush=True)
         print("Scan completed successfully!", flush=True)
-        print(f"Results: {output_file}", flush=True)
+        print("="*60, flush=True)
+        print(f"  JSON Report: {output_paths.get('json', 'N/A')}", flush=True)
+        print(f"  HTML Report: {output_paths.get('html', 'N/A')}", flush=True)
+        print(f"  ZIP Archive: {output_paths.get('zip', 'N/A')}", flush=True)
         print("="*60, flush=True)
 
         return 0