Add HTML report generation with dark theme

Implements comprehensive HTML report generation from JSON scan data with Jinja2 templates. Reports feature a dark slate theme with summary dashboard, drift alerts, security warnings, and expandable service details. Features: - Dark theme HTML reports with slate/grey color scheme - Summary dashboard: scan statistics, drift alerts, security warnings - Site-by-site breakdown with IP grouping and status badges - Expandable service details and SSL/TLS certificate information - Visual badges: green (expected), red (unexpected), yellow (missing) - UDP port handling: shows expected, unexpected, and missing UDP ports - Screenshot links with relative paths for portability - Optimized hover effects for table rows - Standalone HTML output (no external dependencies) Technical changes: - Added src/report_generator.py: HTMLReportGenerator class with summary calculations - Added templates/report_template.html: Jinja2 template for dynamic reports - Added templates/report_mockup.html: Static mockup for design testing - Updated requirements.txt: Added Jinja2==3.1.2 - Updated README.md: Added HTML report generation section with usage and features - Updated CLAUDE.md: Added implementation details, usage guide, and troubleshooting Usage: python3 src/report_generator.py output/scan_report.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-14 01:43:59 +00:00
parent 61cc24f8d2
commit d390c4b491
6 changed files with 2933 additions and 58 deletions
--- a/src/report_generator.py
+++ b/src/report_generator.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+"""
+HTML Report Generator for SneakyScanner
+
+Generates comprehensive HTML reports from JSON scan results with:
+- Summary dashboard (statistics, drift alerts, security warnings)
+- Site-by-site breakdown with service details
+- SSL/TLS certificate and cipher suite information
+- Visual badges for expected vs. unexpected services
+"""
+
+import json
+import logging
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class HTMLReportGenerator:
+    """Generates HTML reports from SneakyScanner JSON output."""
+
+    def __init__(self, json_report_path: str, template_dir: str = 'templates'):
+        """
+        Initialize the HTML report generator.
+
+        Args:
+            json_report_path: Path to the JSON scan report
+            template_dir: Directory containing Jinja2 templates
+        """
+        self.json_report_path = Path(json_report_path)
+        self.template_dir = Path(template_dir)
+        self.report_data = None
+
+        # Initialize Jinja2 environment
+        self.jinja_env = Environment(
+            loader=FileSystemLoader(self.template_dir),
+            autoescape=select_autoescape(['html', 'xml'])
+        )
+
+        # Register custom filters
+        self.jinja_env.filters['format_date'] = self._format_date
+        self.jinja_env.filters['format_duration'] = self._format_duration
+
+    def generate_report(self, output_path: Optional[str] = None) -> str:
+        """
+        Generate HTML report from JSON scan data.
+
+        Args:
+            output_path: Path for output HTML file. If None, derives from JSON filename.
+
+        Returns:
+            Path to generated HTML report
+        """
+        logger.info(f"Loading JSON report from {self.json_report_path}")
+        self._load_json_report()
+
+        logger.info("Calculating summary statistics")
+        summary_stats = self._calculate_summary_stats()
+
+        logger.info("Identifying drift alerts")
+        drift_alerts = self._identify_drift_alerts()
+
+        logger.info("Identifying security warnings")
+        security_warnings = self._identify_security_warnings()
+
+        # Prepare template context
+        context = {
+            'title': self.report_data.get('title', 'SneakyScanner Report'),
+            'scan_time': self.report_data.get('scan_time'),
+            'scan_duration': self.report_data.get('scan_duration'),
+            'config_file': self.report_data.get('config_file'),
+            'sites': self.report_data.get('sites', []),
+            'summary_stats': summary_stats,
+            'drift_alerts': drift_alerts,
+            'security_warnings': security_warnings,
+        }
+
+        # Determine output path
+        if output_path is None:
+            output_path = self.json_report_path.with_suffix('.html')
+        else:
+            output_path = Path(output_path)
+
+        logger.info("Rendering HTML template")
+        template = self.jinja_env.get_template('report_template.html')
+        html_content = template.render(**context)
+
+        logger.info(f"Writing HTML report to {output_path}")
+        output_path.write_text(html_content, encoding='utf-8')
+
+        logger.info(f"Successfully generated HTML report: {output_path}")
+        return str(output_path)
+
+    def _load_json_report(self) -> None:
+        """Load and parse JSON scan report."""
+        if not self.json_report_path.exists():
+            raise FileNotFoundError(f"JSON report not found: {self.json_report_path}")
+
+        with open(self.json_report_path, 'r') as f:
+            self.report_data = json.load(f)
+
+    def _calculate_summary_stats(self) -> Dict[str, int]:
+        """
+        Calculate summary statistics for the dashboard.
+
+        Returns:
+            Dictionary with stat counts
+        """
+        stats = {
+            'total_ips': 0,
+            'tcp_ports': 0,
+            'udp_ports': 0,
+            'services': 0,
+            'web_services': 0,
+            'screenshots': 0,
+        }
+
+        for site in self.report_data.get('sites', []):
+            for ip_data in site.get('ips', []):
+                stats['total_ips'] += 1
+
+                actual = ip_data.get('actual', {})
+                stats['tcp_ports'] += len(actual.get('tcp_ports', []))
+                stats['udp_ports'] += len(actual.get('udp_ports', []))
+
+                services = actual.get('services', [])
+                stats['services'] += len(services)
+
+                # Count web services (HTTP/HTTPS)
+                for service in services:
+                    if service.get('http_info'):
+                        stats['web_services'] += 1
+                        if service['http_info'].get('screenshot'):
+                            stats['screenshots'] += 1
+
+        return stats
+
+    def _identify_drift_alerts(self) -> Dict[str, int]:
+        """
+        Identify infrastructure drift (unexpected/missing items).
+
+        Returns:
+            Dictionary with drift alert counts
+        """
+        alerts = {
+            'unexpected_tcp': 0,
+            'unexpected_udp': 0,
+            'missing_tcp': 0,
+            'missing_udp': 0,
+            'new_services': 0,
+        }
+
+        for site in self.report_data.get('sites', []):
+            for ip_data in site.get('ips', []):
+                expected = ip_data.get('expected', {})
+                actual = ip_data.get('actual', {})
+
+                expected_tcp = set(expected.get('tcp_ports', []))
+                actual_tcp = set(actual.get('tcp_ports', []))
+                expected_udp = set(expected.get('udp_ports', []))
+                actual_udp = set(actual.get('udp_ports', []))
+
+                # Count unexpected ports
+                alerts['unexpected_tcp'] += len(actual_tcp - expected_tcp)
+                alerts['unexpected_udp'] += len(actual_udp - expected_udp)
+
+                # Count missing ports
+                alerts['missing_tcp'] += len(expected_tcp - actual_tcp)
+                alerts['missing_udp'] += len(expected_udp - actual_udp)
+
+                # Count new services (any service on unexpected port)
+                unexpected_ports = (actual_tcp - expected_tcp) | (actual_udp - expected_udp)
+                for service in actual.get('services', []):
+                    if service.get('port') in unexpected_ports:
+                        alerts['new_services'] += 1
+
+        return alerts
+
+    def _identify_security_warnings(self) -> Dict[str, Any]:
+        """
+        Identify security issues (cert expiry, weak TLS, etc.).
+
+        Returns:
+            Dictionary with security warning counts and details
+        """
+        warnings = {
+            'expiring_certs': 0,
+            'weak_tls': 0,
+            'self_signed': 0,
+            'high_ports': 0,
+            'expiring_cert_details': [],  # List of IPs with expiring certs
+        }
+
+        for site in self.report_data.get('sites', []):
+            for ip_data in site.get('ips', []):
+                actual = ip_data.get('actual', {})
+
+                for service in actual.get('services', []):
+                    port = service.get('port')
+
+                    # Check for high ports (>10000)
+                    if port and port > 10000:
+                        warnings['high_ports'] += 1
+
+                    # Check SSL/TLS if present
+                    http_info = service.get('http_info', {})
+                    ssl_tls = http_info.get('ssl_tls', {})
+
+                    if ssl_tls:
+                        # Check certificate expiry
+                        cert = ssl_tls.get('certificate', {})
+                        days_until_expiry = cert.get('days_until_expiry')
+
+                        if days_until_expiry is not None and days_until_expiry < 30:
+                            warnings['expiring_certs'] += 1
+                            warnings['expiring_cert_details'].append({
+                                'ip': ip_data.get('address'),
+                                'port': port,
+                                'days': days_until_expiry,
+                                'subject': cert.get('subject'),
+                            })
+
+                        # Check for self-signed
+                        issuer = cert.get('issuer', '')
+                        subject = cert.get('subject', '')
+                        if issuer and subject and issuer == subject:
+                            warnings['self_signed'] += 1
+
+                        # Check for weak TLS versions
+                        tls_versions = ssl_tls.get('tls_versions', {})
+                        if tls_versions.get('TLS 1.0', {}).get('supported'):
+                            warnings['weak_tls'] += 1
+                        elif tls_versions.get('TLS 1.1', {}).get('supported'):
+                            warnings['weak_tls'] += 1
+
+        return warnings
+
+    @staticmethod
+    def _format_date(date_str: Optional[str]) -> str:
+        """
+        Format ISO date string for display.
+
+        Args:
+            date_str: ISO format date string
+
+        Returns:
+            Formatted date string
+        """
+        if not date_str:
+            return 'N/A'
+
+        try:
+            dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+            return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
+        except (ValueError, AttributeError):
+            return str(date_str)
+
+    @staticmethod
+    def _format_duration(duration: Optional[float]) -> str:
+        """
+        Format scan duration for display.
+
+        Args:
+            duration: Duration in seconds
+
+        Returns:
+            Formatted duration string
+        """
+        if duration is None:
+            return 'N/A'
+
+        if duration < 60:
+            return f"{duration:.1f} seconds"
+        elif duration < 3600:
+            minutes = duration / 60
+            return f"{minutes:.1f} minutes"
+        else:
+            hours = duration / 3600
+            return f"{hours:.2f} hours"
+
+
+def main():
+    """Command-line entry point for standalone usage."""
+    if len(sys.argv) < 2:
+        print("Usage: python report_generator.py <json_report_path> [output_html_path]")
+        print("\nExample:")
+        print("  python report_generator.py output/scan_report_20251114_103000.json")
+        print("  python report_generator.py output/scan_report.json custom_report.html")
+        sys.exit(1)
+
+    json_path = sys.argv[1]
+    output_path = sys.argv[2] if len(sys.argv) > 2 else None
+
+    try:
+        # Determine template directory relative to script location
+        script_dir = Path(__file__).parent.parent
+        template_dir = script_dir / 'templates'
+
+        generator = HTMLReportGenerator(json_path, template_dir=str(template_dir))
+        result_path = generator.generate_report(output_path)
+
+        print(f"\n✓ Successfully generated HTML report:")
+        print(f"  {result_path}")
+
+    except FileNotFoundError as e:
+        logger.error(f"File not found: {e}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in report file: {e}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error generating report: {e}", exc_info=True)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()