""" Background scan job execution. This module handles the execution of scans in background threads, updating database status and handling errors. """ import logging import traceback from datetime import datetime from pathlib import Path from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from src.scanner import SneakyScanner from web.models import Scan from web.services.scan_service import ScanService from web.services.alert_service import AlertService logger = logging.getLogger(__name__) def execute_scan(scan_id: int, config_file: str = None, config_id: int = None, db_url: str = None): """ Execute a scan in the background. This function is designed to run in a background thread via APScheduler. It creates its own database session to avoid conflicts with the main application thread. Args: scan_id: ID of the scan record in database config_file: Path to YAML configuration file (legacy, optional) config_id: Database config ID (preferred, optional) db_url: Database connection URL Note: Provide exactly one of config_file or config_id Workflow: 1. Create new database session for this thread 2. Update scan status to 'running' 3. Execute scanner 4. Generate output files (JSON, HTML, ZIP) 5. Save results to database 6. Update status to 'completed' or 'failed' """ config_desc = f"config_id={config_id}" if config_id else f"config_file={config_file}" logger.info(f"Starting background scan execution: scan_id={scan_id}, {config_desc}") # Create new database session for this thread engine = create_engine(db_url, echo=False) Session = sessionmaker(bind=engine) session = Session() try: # Get scan record scan = session.query(Scan).filter_by(id=scan_id).first() if not scan: logger.error(f"Scan {scan_id} not found in database") return # Update status to running (in case it wasn't already) scan.status = 'running' scan.started_at = datetime.utcnow() session.commit() logger.info(f"Scan {scan_id}: Initializing scanner with {config_desc}") # Initialize scanner based on config type if config_id: # Use database config scanner = SneakyScanner(config_id=config_id) else: # Use YAML config file # Convert config_file to full path if it's just a filename if not config_file.startswith('/'): config_path = f'/app/configs/{config_file}' else: config_path = config_file scanner = SneakyScanner(config_path=config_path) # Execute scan logger.info(f"Scan {scan_id}: Running scanner...") start_time = datetime.utcnow() report, timestamp = scanner.scan() end_time = datetime.utcnow() scan_duration = (end_time - start_time).total_seconds() logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds") # Generate output files (JSON, HTML, ZIP) logger.info(f"Scan {scan_id}: Generating output files...") scanner.generate_outputs(report, timestamp) # Save results to database logger.info(f"Scan {scan_id}: Saving results to database...") scan_service = ScanService(session) scan_service._save_scan_to_db(report, scan_id, status='completed') # Evaluate alert rules logger.info(f"Scan {scan_id}: Evaluating alert rules...") try: alert_service = AlertService(session) alerts_triggered = alert_service.evaluate_alert_rules(scan_id) logger.info(f"Scan {scan_id}: {len(alerts_triggered)} alerts triggered") except Exception as e: # Don't fail the scan if alert evaluation fails logger.error(f"Scan {scan_id}: Alert evaluation failed: {str(e)}") logger.debug(f"Alert evaluation error details: {traceback.format_exc()}") logger.info(f"Scan {scan_id}: Completed successfully") except FileNotFoundError as e: # Config file not found error_msg = f"Configuration file not found: {str(e)}" logger.error(f"Scan {scan_id}: {error_msg}") scan = session.query(Scan).filter_by(id=scan_id).first() if scan: scan.status = 'failed' scan.error_message = error_msg scan.completed_at = datetime.utcnow() session.commit() except Exception as e: # Any other error during scan execution error_msg = f"Scan execution failed: {str(e)}" logger.error(f"Scan {scan_id}: {error_msg}") logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}") try: scan = session.query(Scan).filter_by(id=scan_id).first() if scan: scan.status = 'failed' scan.error_message = error_msg scan.completed_at = datetime.utcnow() session.commit() except Exception as db_error: logger.error(f"Scan {scan_id}: Failed to update error status in database: {str(db_error)}") finally: # Always close the session session.close() logger.info(f"Scan {scan_id}: Background job completed, session closed") def get_scan_status_from_db(scan_id: int, db_url: str) -> dict: """ Helper function to get scan status directly from database. Useful for monitoring background jobs without needing Flask app context. Args: scan_id: Scan ID to check db_url: Database connection URL Returns: Dictionary with scan status information """ engine = create_engine(db_url, echo=False) Session = sessionmaker(bind=engine) session = Session() try: scan = session.query(Scan).filter_by(id=scan_id).first() if not scan: return None return { 'scan_id': scan.id, 'status': scan.status, 'timestamp': scan.timestamp.isoformat() if scan.timestamp else None, 'duration': scan.duration, 'error_message': scan.error_message } finally: session.close()