SneakyScan/web/jobs/scan_job.py

"""
Background scan job execution.

This module handles the execution of scans in background threads,
updating database status and handling errors.
"""

import logging
import traceback
from datetime import datetime
from pathlib import Path

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from src.scanner import SneakyScanner
from web.models import Scan
from web.services.scan_service import ScanService

logger = logging.getLogger(__name__)


def execute_scan(scan_id: int, config_file: str, db_url: str):
    """
    Execute a scan in the background.

    This function is designed to run in a background thread via APScheduler.
    It creates its own database session to avoid conflicts with the main
    application thread.

    Args:
        scan_id: ID of the scan record in database
        config_file: Path to YAML configuration file
        db_url: Database connection URL

    Workflow:
        1. Create new database session for this thread
        2. Update scan status to 'running'
        3. Execute scanner
        4. Generate output files (JSON, HTML, ZIP)
        5. Save results to database
        6. Update status to 'completed' or 'failed'
    """
    logger.info(f"Starting background scan execution: scan_id={scan_id}, config={config_file}")

    # Create new database session for this thread
    engine = create_engine(db_url, echo=False)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        # Get scan record
        scan = session.query(Scan).filter_by(id=scan_id).first()
        if not scan:
            logger.error(f"Scan {scan_id} not found in database")
            return

        # Update status to running (in case it wasn't already)
        scan.status = 'running'
        scan.started_at = datetime.utcnow()
        session.commit()

        logger.info(f"Scan {scan_id}: Initializing scanner with config {config_file}")

        # Initialize scanner
        scanner = SneakyScanner(config_file)

        # Execute scan
        logger.info(f"Scan {scan_id}: Running scanner...")
        start_time = datetime.utcnow()
        report, timestamp = scanner.scan()
        end_time = datetime.utcnow()

        scan_duration = (end_time - start_time).total_seconds()
        logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds")

        # Generate output files (JSON, HTML, ZIP)
        logger.info(f"Scan {scan_id}: Generating output files...")
        scanner.generate_outputs(report, timestamp)

        # Save results to database
        logger.info(f"Scan {scan_id}: Saving results to database...")
        scan_service = ScanService(session)
        scan_service._save_scan_to_db(report, scan_id, status='completed')

        logger.info(f"Scan {scan_id}: Completed successfully")

    except FileNotFoundError as e:
        # Config file not found
        error_msg = f"Configuration file not found: {str(e)}"
        logger.error(f"Scan {scan_id}: {error_msg}")

        scan = session.query(Scan).filter_by(id=scan_id).first()
        if scan:
            scan.status = 'failed'
            scan.error_message = error_msg
            scan.completed_at = datetime.utcnow()
            session.commit()

    except Exception as e:
        # Any other error during scan execution
        error_msg = f"Scan execution failed: {str(e)}"
        logger.error(f"Scan {scan_id}: {error_msg}")
        logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}")

        try:
            scan = session.query(Scan).filter_by(id=scan_id).first()
            if scan:
                scan.status = 'failed'
                scan.error_message = error_msg
                scan.completed_at = datetime.utcnow()
                session.commit()
        except Exception as db_error:
            logger.error(f"Scan {scan_id}: Failed to update error status in database: {str(db_error)}")

    finally:
        # Always close the session
        session.close()
        logger.info(f"Scan {scan_id}: Background job completed, session closed")


def get_scan_status_from_db(scan_id: int, db_url: str) -> dict:
    """
    Helper function to get scan status directly from database.

    Useful for monitoring background jobs without needing Flask app context.

    Args:
        scan_id: Scan ID to check
        db_url: Database connection URL

    Returns:
        Dictionary with scan status information
    """
    engine = create_engine(db_url, echo=False)
    Session = sessionmaker(bind=engine)
    session = Session()

    try:
        scan = session.query(Scan).filter_by(id=scan_id).first()
        if not scan:
            return None

        return {
            'scan_id': scan.id,
            'status': scan.status,
            'timestamp': scan.timestamp.isoformat() if scan.timestamp else None,
            'duration': scan.duration,
            'error_message': scan.error_message
        }
    finally:
        session.close()