""" Background scan job execution. This module handles the execution of scans in background threads, updating database status and handling errors. """ import logging import traceback from datetime import datetime from pathlib import Path from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from src.scanner import SneakyScanner from web.models import Scan from web.services.scan_service import ScanService logger = logging.getLogger(__name__) def execute_scan(scan_id: int, config_file: str, db_url: str): """ Execute a scan in the background. This function is designed to run in a background thread via APScheduler. It creates its own database session to avoid conflicts with the main application thread. Args: scan_id: ID of the scan record in database config_file: Path to YAML configuration file db_url: Database connection URL Workflow: 1. Create new database session for this thread 2. Update scan status to 'running' 3. Execute scanner 4. Generate output files (JSON, HTML, ZIP) 5. Save results to database 6. Update status to 'completed' or 'failed' """ logger.info(f"Starting background scan execution: scan_id={scan_id}, config={config_file}") # Create new database session for this thread engine = create_engine(db_url, echo=False) Session = sessionmaker(bind=engine) session = Session() try: # Get scan record scan = session.query(Scan).filter_by(id=scan_id).first() if not scan: logger.error(f"Scan {scan_id} not found in database") return # Update status to running (in case it wasn't already) scan.status = 'running' scan.started_at = datetime.utcnow() session.commit() logger.info(f"Scan {scan_id}: Initializing scanner with config {config_file}") # Initialize scanner scanner = SneakyScanner(config_file) # Execute scan logger.info(f"Scan {scan_id}: Running scanner...") start_time = datetime.utcnow() report, timestamp = scanner.scan() end_time = datetime.utcnow() scan_duration = (end_time - start_time).total_seconds() logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds") # Generate output files (JSON, HTML, ZIP) logger.info(f"Scan {scan_id}: Generating output files...") scanner.generate_outputs(report, timestamp) # Save results to database logger.info(f"Scan {scan_id}: Saving results to database...") scan_service = ScanService(session) scan_service._save_scan_to_db(report, scan_id, status='completed') logger.info(f"Scan {scan_id}: Completed successfully") except FileNotFoundError as e: # Config file not found error_msg = f"Configuration file not found: {str(e)}" logger.error(f"Scan {scan_id}: {error_msg}") scan = session.query(Scan).filter_by(id=scan_id).first() if scan: scan.status = 'failed' scan.error_message = error_msg scan.completed_at = datetime.utcnow() session.commit() except Exception as e: # Any other error during scan execution error_msg = f"Scan execution failed: {str(e)}" logger.error(f"Scan {scan_id}: {error_msg}") logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}") try: scan = session.query(Scan).filter_by(id=scan_id).first() if scan: scan.status = 'failed' scan.error_message = error_msg scan.completed_at = datetime.utcnow() session.commit() except Exception as db_error: logger.error(f"Scan {scan_id}: Failed to update error status in database: {str(db_error)}") finally: # Always close the session session.close() logger.info(f"Scan {scan_id}: Background job completed, session closed") def get_scan_status_from_db(scan_id: int, db_url: str) -> dict: """ Helper function to get scan status directly from database. Useful for monitoring background jobs without needing Flask app context. Args: scan_id: Scan ID to check db_url: Database connection URL Returns: Dictionary with scan status information """ engine = create_engine(db_url, echo=False) Session = sessionmaker(bind=engine) session = Session() try: scan = session.query(Scan).filter_by(id=scan_id).first() if not scan: return None return { 'scan_id': scan.id, 'status': scan.status, 'timestamp': scan.timestamp.isoformat() if scan.timestamp else None, 'duration': scan.duration, 'error_message': scan.error_message } finally: session.close()