mass-scan2/app/main.py

#!/usr/bin/env python3
import logging
logging.basicConfig(level=logging.INFO)

# TODO:
# LOGGING
# TLS SCANNING
# TLS Version PROBE
# EMAIL

import time
from pathlib import Path
from typing import Dict, List, Set


from utils.scan_config_loader import ScanConfigRepository, ScanConfigFile
from utils.schedule_manager import ScanScheduler
from utils.scanner import nmap_scanner
from utils.models import HostResult

from reporting_jinja import write_html_report_jinja
from utils.settings import get_settings
from utils.common import get_common_utils

logger = logging.getLogger(__file__)

utils = get_common_utils()
settings = get_settings()

HTML_REPORT_FILE = Path() / "data" / "report.html"

def results_to_open_sets(
        results: List[HostResult],
        count_as_open: Set[str] = frozenset({"open", "open|filtered"})) -> Dict[str, Dict[str, Set[int]]]:
    """
    Convert HostResult list to:
    { ip: {"tcp": {open ports}, "udp": {open ports}} }
    Only include ports whose state is in `count_as_open`.
    """
    out: Dict[str, Dict[str, Set[int]]] = {}
    for hr in results:
        tcp = set()
        udp = set()
        for p in hr.ports:
            if p.state.lower() in count_as_open:
                (tcp if p.protocol == "tcp" else udp).add(p.port)
        out[hr.address] = {"tcp": tcp, "udp": udp}
    return out

# Build the "reports" dict (what the HTML renderer expects)
def build_reports(
    scan_config: "ScanConfigFile",
    discovered: Dict[str, Dict[str, Set[int]]],
) -> Dict[str, Dict[str, List[int]]]:
    """
    Create the per-IP delta structure using expected ports from `scan_config.scan_targets`
    and discovered ports from `discovered`.

    Output format:
    {
      ip: {
        "unexpected_tcp": [...],
        "missing_tcp": [...],
        "unexpected_udp": [...],
        "missing_udp": [...]
      }
    }

    Notes:
      - If a host has no expected UDP ports in the config, `expected_udp` is empty here.
        (This function reflects *expectations*, not what to scan. Your scan logic can still
        choose 'top UDP ports' for those hosts.)
      - The `discovered` dict is expected to use keys "tcp" / "udp" per host.
    """
    # Build `expected` from scan_config.scan_targets
    expected: Dict[str, Dict[str, Set[int]]] = {}
    cfg_targets = getattr(scan_config, "scan_targets", []) or []

    for t in cfg_targets:
        # Works whether ScanTarget is a dataclass or a dict-like object
        ip = getattr(t, "ip", None) if hasattr(t, "ip") else t.get("ip")
        if not ip:
            continue

        raw_tcp = getattr(t, "expected_tcp", None) if hasattr(t, "expected_tcp") else t.get("expected_tcp", [])
        raw_udp = getattr(t, "expected_udp", None) if hasattr(t, "expected_udp") else t.get("expected_udp", [])

        exp_tcp = set(int(p) for p in (raw_tcp or []))
        exp_udp = set(int(p) for p in (raw_udp or []))

        expected[ip] = {
            "expected_tcp": exp_tcp,
            "expected_udp": exp_udp,
        }

    # Union of IPs present in either expectations or discoveries
    all_ips = set(expected.keys()) | set(discovered.keys())

    reports: Dict[str, Dict[str, List[int]]] = {}
    for ip in sorted(all_ips):
        # Expected sets (default to empty sets if not present)
        exp_tcp = expected.get(ip, {}).get("expected_tcp", set())
        exp_udp = expected.get(ip, {}).get("expected_udp", set())

        # Discovered sets (default to empty sets if not present)
        disc_tcp = discovered.get(ip, {}).get("tcp", set()) or set()
        disc_udp = discovered.get(ip, {}).get("udp", set()) or set()

        # Ensure sets in case caller provided lists
        if not isinstance(disc_tcp, set):
            disc_tcp = set(disc_tcp)
        if not isinstance(disc_udp, set):
            disc_udp = set(disc_udp)

        reports[ip] = {
            "unexpected_tcp": sorted(disc_tcp - exp_tcp),
            "missing_tcp": sorted(exp_tcp - disc_tcp),
            "unexpected_udp": sorted(disc_udp - exp_udp),
            "missing_udp": sorted(exp_udp - disc_udp),
        }

    return reports

def run_repo_scan(scan_config:ScanConfigFile):
    logger.info(f"Starting scan for {scan_config.name}")
    logger.info("Options: udp=%s tls_sec=%s tls_exp=%s",
                scan_config.scan_options.udp_scan,
                scan_config.scan_options.tls_security_scan,
                scan_config.scan_options.tls_exp_check)
    logger.info("Targets: %d hosts", len(scan_config.scan_targets))
    scanner = nmap_scanner(scan_config)
    scan_results = scanner.scan_targets()
    discovered_sets = results_to_open_sets(scan_results, count_as_open={"open", "open|filtered"})
    reports = build_reports(scan_config, discovered_sets)
    write_html_report_jinja(reports=reports,host_results=scan_results,out_path=HTML_REPORT_FILE,title="Compliance Report",only_issues=True)
    scanner.cleanup()

def main():
    logger.info(f"{settings.app.name} - v{settings.app.version_major}.{settings.app.version_minor} Started")
    logger.info(f"Application Running Production flag set to: {settings.app.production}")

    # timezone validation
    if utils.TextUtils.is_valid_timezone(settings.app.timezone):
        logger.info(f"Timezone set to {settings.app.timezone}")
        app_timezone = settings.app.timezone
    else:
        logger.warning(f"The Timezone {settings.app.timezone} is invalid, Defaulting to UTC")
        app_timezone = "America/Danmarkshavn" # UTC

    # load / configure the scan repos
    repo = ScanConfigRepository()
    scan_configs = repo.load_all()

    # if in prod - run the scheduler like normal
    if settings.app.production:
        sched = ScanScheduler(timezone=app_timezone)
        sched.start()

        jobs = sched.schedule_configs(scan_configs, run_scan_fn=run_repo_scan)
        logger.info("Scheduled %d job(s).", jobs)

        try:
            while True:
                time.sleep(3600)
        except KeyboardInterrupt:
            sched.shutdown()
    else:
        # run single scan in dev mode
        run_repo_scan(scan_configs[0])

if __name__ == "__main__":
    main()