restructure of dirs, huge docs update

2025-11-17 16:29:14 -06:00
parent 456e052389
commit cd840cb8ca
87 changed files with 2827 additions and 1094 deletions
--- a/app/web/services/init.py
+++ b/app/web/services/init.py
@@ -0,0 +1,10 @@
+"""
+Services package for SneakyScanner web application.
+
+This package contains business logic layer services that orchestrate
+operations between API endpoints and database models.
+"""
+
+from web.services.scan_service import ScanService
+
+__all__ = ['ScanService']
--- a/app/web/services/config_service.py
+++ b/app/web/services/config_service.py
@@ -0,0 +1,552 @@
+"""
+Config Service - Business logic for config file management
+
+This service handles all operations related to scan configuration files,
+including creation, validation, listing, and deletion.
+"""
+
+import os
+import re
+import yaml
+import ipaddress
+from typing import Dict, List, Tuple, Any, Optional
+from datetime import datetime
+from pathlib import Path
+from werkzeug.utils import secure_filename
+
+
+class ConfigService:
+    """Business logic for config management"""
+
+    def __init__(self, configs_dir: str = '/app/configs'):
+        """
+        Initialize the config service.
+
+        Args:
+            configs_dir: Directory where config files are stored
+        """
+        self.configs_dir = configs_dir
+
+        # Ensure configs directory exists
+        os.makedirs(self.configs_dir, exist_ok=True)
+
+    def list_configs(self) -> List[Dict[str, Any]]:
+        """
+        List all config files with metadata.
+
+        Returns:
+            List of config metadata dictionaries:
+            [
+                {
+                    "filename": "prod-scan.yaml",
+                    "title": "Prod Scan",
+                    "path": "/app/configs/prod-scan.yaml",
+                    "created_at": "2025-11-15T10:30:00Z",
+                    "size_bytes": 1234,
+                    "used_by_schedules": ["Daily Scan", "Weekly Audit"]
+                }
+            ]
+        """
+        configs = []
+
+        # Get all YAML files in configs directory
+        if not os.path.exists(self.configs_dir):
+            return configs
+
+        for filename in os.listdir(self.configs_dir):
+            if not filename.endswith(('.yaml', '.yml')):
+                continue
+
+            filepath = os.path.join(self.configs_dir, filename)
+
+            if not os.path.isfile(filepath):
+                continue
+
+            try:
+                # Get file metadata
+                stat_info = os.stat(filepath)
+                created_at = datetime.fromtimestamp(stat_info.st_mtime).isoformat() + 'Z'
+                size_bytes = stat_info.st_size
+
+                # Parse YAML to get title
+                title = None
+                try:
+                    with open(filepath, 'r') as f:
+                        data = yaml.safe_load(f)
+                        if isinstance(data, dict):
+                            title = data.get('title', filename)
+                except Exception:
+                    title = filename  # Fallback to filename if parsing fails
+
+                # Get schedules using this config
+                used_by_schedules = self.get_schedules_using_config(filename)
+
+                configs.append({
+                    'filename': filename,
+                    'title': title,
+                    'path': filepath,
+                    'created_at': created_at,
+                    'size_bytes': size_bytes,
+                    'used_by_schedules': used_by_schedules
+                })
+            except Exception as e:
+                # Skip files that can't be read
+                continue
+
+        # Sort by created_at (most recent first)
+        configs.sort(key=lambda x: x['created_at'], reverse=True)
+
+        return configs
+
+    def get_config(self, filename: str) -> Dict[str, Any]:
+        """
+        Get config file content and parsed data.
+
+        Args:
+            filename: Config filename
+
+        Returns:
+            {
+                "filename": "prod-scan.yaml",
+                "content": "title: Prod Scan\n...",
+                "parsed": {"title": "Prod Scan", "sites": [...]}
+            }
+
+        Raises:
+            FileNotFoundError: If config doesn't exist
+            ValueError: If config content is invalid
+        """
+        filepath = os.path.join(self.configs_dir, filename)
+
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(f"Config file '{filename}' not found")
+
+        # Read file content
+        with open(filepath, 'r') as f:
+            content = f.read()
+
+        # Parse YAML
+        try:
+            parsed = yaml.safe_load(content)
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML syntax: {str(e)}")
+
+        return {
+            'filename': filename,
+            'content': content,
+            'parsed': parsed
+        }
+
+    def create_from_yaml(self, filename: str, content: str) -> str:
+        """
+        Create config from YAML content.
+
+        Args:
+            filename: Desired filename (will be sanitized)
+            content: YAML content string
+
+        Returns:
+            Final filename (sanitized)
+
+        Raises:
+            ValueError: If content invalid or filename conflict
+        """
+        # Sanitize filename
+        filename = secure_filename(filename)
+
+        # Ensure .yaml extension
+        if not filename.endswith(('.yaml', '.yml')):
+            filename += '.yaml'
+
+        filepath = os.path.join(self.configs_dir, filename)
+
+        # Check for conflicts
+        if os.path.exists(filepath):
+            raise ValueError(f"Config file '{filename}' already exists")
+
+        # Parse and validate YAML
+        try:
+            parsed = yaml.safe_load(content)
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML syntax: {str(e)}")
+
+        # Validate config structure
+        is_valid, error_msg = self.validate_config_content(parsed)
+        if not is_valid:
+            raise ValueError(f"Invalid config structure: {error_msg}")
+
+        # Write file
+        with open(filepath, 'w') as f:
+            f.write(content)
+
+        return filename
+
+    def create_from_cidr(
+        self,
+        title: str,
+        cidr: str,
+        site_name: Optional[str] = None,
+        ping_default: bool = False
+    ) -> Tuple[str, str]:
+        """
+        Create config from CIDR range.
+
+        Args:
+            title: Scan configuration title
+            cidr: CIDR range (e.g., "10.0.0.0/24")
+            site_name: Optional site name (defaults to "Site 1")
+            ping_default: Default ping expectation for all IPs
+
+        Returns:
+            Tuple of (final_filename, yaml_content)
+
+        Raises:
+            ValueError: If CIDR invalid or other validation errors
+        """
+        # Validate and parse CIDR
+        try:
+            network = ipaddress.ip_network(cidr, strict=False)
+        except ValueError as e:
+            raise ValueError(f"Invalid CIDR range: {str(e)}")
+
+        # Check if network is too large (prevent expansion of huge ranges)
+        if network.num_addresses > 10000:
+            raise ValueError(f"CIDR range too large: {network.num_addresses} addresses. Maximum is 10,000.")
+
+        # Expand CIDR to list of IP addresses
+        ip_list = [str(ip) for ip in network.hosts()]
+
+        # If network has only 1 address (like /32 or /128), hosts() returns empty
+        # In that case, use the network address itself
+        if not ip_list:
+            ip_list = [str(network.network_address)]
+
+        # Build site name
+        if not site_name or not site_name.strip():
+            site_name = "Site 1"
+
+        # Build IP configurations
+        ips = []
+        for ip_address in ip_list:
+            ips.append({
+                'address': ip_address,
+                'expected': {
+                    'ping': ping_default,
+                    'tcp_ports': [],
+                    'udp_ports': []
+                }
+            })
+
+        # Build YAML structure
+        config_data = {
+            'title': title.strip(),
+            'sites': [
+                {
+                    'name': site_name.strip(),
+                    'ips': ips
+                }
+            ]
+        }
+
+        # Convert to YAML string
+        yaml_content = yaml.dump(config_data, sort_keys=False, default_flow_style=False)
+
+        # Generate filename from title
+        filename = self.generate_filename_from_title(title)
+
+        filepath = os.path.join(self.configs_dir, filename)
+
+        # Check for conflicts
+        if os.path.exists(filepath):
+            raise ValueError(f"Config file '{filename}' already exists")
+
+        # Write file
+        with open(filepath, 'w') as f:
+            f.write(yaml_content)
+
+        return filename, yaml_content
+
+    def update_config(self, filename: str, yaml_content: str) -> None:
+        """
+        Update existing config file with new YAML content.
+
+        Args:
+            filename: Config filename to update
+            yaml_content: New YAML content string
+
+        Raises:
+            FileNotFoundError: If config doesn't exist
+            ValueError: If YAML content is invalid
+        """
+        filepath = os.path.join(self.configs_dir, filename)
+
+        # Check if file exists
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(f"Config file '{filename}' not found")
+
+        # Parse and validate YAML
+        try:
+            parsed = yaml.safe_load(yaml_content)
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML syntax: {str(e)}")
+
+        # Validate config structure
+        is_valid, error_msg = self.validate_config_content(parsed)
+        if not is_valid:
+            raise ValueError(f"Invalid config structure: {error_msg}")
+
+        # Write updated content
+        with open(filepath, 'w') as f:
+            f.write(yaml_content)
+
+    def delete_config(self, filename: str) -> None:
+        """
+        Delete config file and cascade delete any associated schedules.
+
+        When a config is deleted, all schedules using that config (both enabled
+        and disabled) are automatically deleted as well, since they would be
+        invalid without the config file.
+
+        Args:
+            filename: Config filename to delete
+
+        Raises:
+            FileNotFoundError: If config doesn't exist
+        """
+        filepath = os.path.join(self.configs_dir, filename)
+
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(f"Config file '{filename}' not found")
+
+        # Delete any schedules using this config (both enabled and disabled)
+        try:
+            from web.services.schedule_service import ScheduleService
+            from flask import current_app
+
+            # Get database session from Flask app
+            db = current_app.db_session
+
+            # Get all schedules
+            schedule_service = ScheduleService(db)
+            result = schedule_service.list_schedules(page=1, per_page=10000)
+            schedules = result.get('schedules', [])
+
+            # Build full path for comparison
+            config_path = os.path.join(self.configs_dir, filename)
+
+            # Find and delete all schedules using this config (enabled or disabled)
+            deleted_schedules = []
+            for schedule in schedules:
+                schedule_config = schedule.get('config_file', '')
+
+                # Handle both absolute paths and just filenames
+                if schedule_config == filename or schedule_config == config_path:
+                    schedule_id = schedule.get('id')
+                    schedule_name = schedule.get('name', 'Unknown')
+                    try:
+                        schedule_service.delete_schedule(schedule_id)
+                        deleted_schedules.append(schedule_name)
+                    except Exception as e:
+                        import logging
+                        logging.getLogger(__name__).warning(
+                            f"Failed to delete schedule {schedule_id} ('{schedule_name}'): {e}"
+                        )
+
+            if deleted_schedules:
+                import logging
+                logging.getLogger(__name__).info(
+                    f"Cascade deleted {len(deleted_schedules)} schedule(s) associated with config '{filename}': {', '.join(deleted_schedules)}"
+                )
+
+        except ImportError:
+            # If ScheduleService doesn't exist yet, skip schedule deletion
+            pass
+        except Exception as e:
+            # Log error but continue with config deletion
+            import logging
+            logging.getLogger(__name__).error(
+                f"Error deleting schedules for config {filename}: {e}", exc_info=True
+            )
+
+        # Delete file
+        os.remove(filepath)
+
+    def validate_config_content(self, content: Dict) -> Tuple[bool, str]:
+        """
+        Validate parsed YAML config structure.
+
+        Args:
+            content: Parsed YAML config as dict
+
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        if not isinstance(content, dict):
+            return False, "Config must be a dictionary/object"
+
+        # Check required fields
+        if 'title' not in content:
+            return False, "Missing required field: 'title'"
+
+        if 'sites' not in content:
+            return False, "Missing required field: 'sites'"
+
+        # Validate title
+        if not isinstance(content['title'], str) or not content['title'].strip():
+            return False, "Field 'title' must be a non-empty string"
+
+        # Validate sites
+        sites = content['sites']
+        if not isinstance(sites, list):
+            return False, "Field 'sites' must be a list"
+
+        if len(sites) == 0:
+            return False, "Must have at least one site defined"
+
+        # Validate each site
+        for i, site in enumerate(sites):
+            if not isinstance(site, dict):
+                return False, f"Site {i+1} must be a dictionary/object"
+
+            if 'name' not in site:
+                return False, f"Site {i+1} missing required field: 'name'"
+
+            if 'ips' not in site:
+                return False, f"Site {i+1} missing required field: 'ips'"
+
+            if not isinstance(site['ips'], list):
+                return False, f"Site {i+1} field 'ips' must be a list"
+
+            if len(site['ips']) == 0:
+                return False, f"Site {i+1} must have at least one IP"
+
+            # Validate each IP
+            for j, ip_config in enumerate(site['ips']):
+                if not isinstance(ip_config, dict):
+                    return False, f"Site {i+1} IP {j+1} must be a dictionary/object"
+
+                if 'address' not in ip_config:
+                    return False, f"Site {i+1} IP {j+1} missing required field: 'address'"
+
+                if 'expected' not in ip_config:
+                    return False, f"Site {i+1} IP {j+1} missing required field: 'expected'"
+
+                if not isinstance(ip_config['expected'], dict):
+                    return False, f"Site {i+1} IP {j+1} field 'expected' must be a dictionary/object"
+
+        return True, ""
+
+    def get_schedules_using_config(self, filename: str) -> List[str]:
+        """
+        Get list of schedule names using this config.
+
+        Args:
+            filename: Config filename
+
+        Returns:
+            List of schedule names (e.g., ["Daily Scan", "Weekly Audit"])
+        """
+        # Import here to avoid circular dependency
+        try:
+            from web.services.schedule_service import ScheduleService
+            from flask import current_app
+
+            # Get database session from Flask app
+            db = current_app.db_session
+
+            # Get all schedules (use large per_page to get all)
+            schedule_service = ScheduleService(db)
+            result = schedule_service.list_schedules(page=1, per_page=10000)
+
+            # Extract schedules list from paginated result
+            schedules = result.get('schedules', [])
+
+            # Build full path for comparison
+            config_path = os.path.join(self.configs_dir, filename)
+
+            # Find schedules using this config (only enabled schedules)
+            using_schedules = []
+            for schedule in schedules:
+                schedule_config = schedule.get('config_file', '')
+
+                # Handle both absolute paths and just filenames
+                if schedule_config == filename or schedule_config == config_path:
+                    # Only count enabled schedules
+                    if schedule.get('enabled', False):
+                        using_schedules.append(schedule.get('name', 'Unknown'))
+
+            return using_schedules
+
+        except ImportError:
+            # If ScheduleService doesn't exist yet, return empty list
+            return []
+        except Exception as e:
+            # If any error occurs, return empty list (safer than failing)
+            # Log the error for debugging
+            import logging
+            logging.getLogger(__name__).error(f"Error getting schedules using config {filename}: {e}", exc_info=True)
+            return []
+
+    def generate_filename_from_title(self, title: str) -> str:
+        """
+        Generate safe filename from scan title.
+
+        Args:
+            title: Scan title string
+
+        Returns:
+            Safe filename (e.g., "Prod Scan 2025" -> "prod-scan-2025.yaml")
+        """
+        # Convert to lowercase
+        filename = title.lower()
+
+        # Replace spaces with hyphens
+        filename = filename.replace(' ', '-')
+
+        # Remove special characters (keep only alphanumeric, hyphens, underscores)
+        filename = re.sub(r'[^a-z0-9\-_]', '', filename)
+
+        # Remove consecutive hyphens
+        filename = re.sub(r'-+', '-', filename)
+
+        # Remove leading/trailing hyphens
+        filename = filename.strip('-')
+
+        # Limit length (max 200 chars, reserve 5 for .yaml)
+        max_length = 195
+        if len(filename) > max_length:
+            filename = filename[:max_length]
+
+        # Ensure not empty
+        if not filename:
+            filename = 'config'
+
+        # Add .yaml extension
+        filename += '.yaml'
+
+        return filename
+
+    def get_config_path(self, filename: str) -> str:
+        """
+        Get absolute path for a config file.
+
+        Args:
+            filename: Config filename
+
+        Returns:
+            Absolute path to config file
+        """
+        return os.path.join(self.configs_dir, filename)
+
+    def config_exists(self, filename: str) -> bool:
+        """
+        Check if a config file exists.
+
+        Args:
+            filename: Config filename
+
+        Returns:
+            True if file exists, False otherwise
+        """
+        filepath = os.path.join(self.configs_dir, filename)
+        return os.path.exists(filepath) and os.path.isfile(filepath)
--- a/app/web/services/scan_service.py
+++ b/app/web/services/scan_service.py
--- a/app/web/services/schedule_service.py
+++ b/app/web/services/schedule_service.py
@@ -0,0 +1,483 @@
+"""
+Schedule service for managing scheduled scan operations.
+
+This service handles the business logic for creating, updating, and managing
+scheduled scans with cron expressions.
+"""
+
+import logging
+import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+
+from croniter import croniter
+from sqlalchemy.orm import Session
+
+from web.models import Schedule, Scan
+from web.utils.pagination import paginate, PaginatedResult
+
+logger = logging.getLogger(__name__)
+
+
+class ScheduleService:
+    """
+    Service for managing scheduled scans.
+
+    Handles schedule lifecycle: creation, validation, updating,
+    and cron expression processing.
+    """
+
+    def __init__(self, db_session: Session):
+        """
+        Initialize schedule service.
+
+        Args:
+            db_session: SQLAlchemy database session
+        """
+        self.db = db_session
+
+    def create_schedule(
+        self,
+        name: str,
+        config_file: str,
+        cron_expression: str,
+        enabled: bool = True
+    ) -> int:
+        """
+        Create a new schedule.
+
+        Args:
+            name: Human-readable schedule name
+            config_file: Path to YAML configuration file
+            cron_expression: Cron expression (e.g., '0 2 * * *')
+            enabled: Whether schedule is active
+
+        Returns:
+            Schedule ID of the created schedule
+
+        Raises:
+            ValueError: If cron expression is invalid or config file doesn't exist
+        """
+        # Validate cron expression
+        is_valid, error_msg = self.validate_cron_expression(cron_expression)
+        if not is_valid:
+            raise ValueError(f"Invalid cron expression: {error_msg}")
+
+        # Validate config file exists
+        # If config_file is just a filename, prepend the configs directory
+        if not config_file.startswith('/'):
+            config_file_path = os.path.join('/app/configs', config_file)
+        else:
+            config_file_path = config_file
+
+        if not os.path.isfile(config_file_path):
+            raise ValueError(f"Config file not found: {config_file}")
+
+        # Calculate next run time
+        next_run = self.calculate_next_run(cron_expression) if enabled else None
+
+        # Create schedule record
+        schedule = Schedule(
+            name=name,
+            config_file=config_file,
+            cron_expression=cron_expression,
+            enabled=enabled,
+            last_run=None,
+            next_run=next_run,
+            created_at=datetime.utcnow(),
+            updated_at=datetime.utcnow()
+        )
+
+        self.db.add(schedule)
+        self.db.commit()
+        self.db.refresh(schedule)
+
+        logger.info(f"Schedule {schedule.id} created: '{name}' with cron '{cron_expression}'")
+
+        return schedule.id
+
+    def get_schedule(self, schedule_id: int) -> Dict[str, Any]:
+        """
+        Get schedule details by ID.
+
+        Args:
+            schedule_id: Schedule ID
+
+        Returns:
+            Schedule dictionary with details and execution history
+
+        Raises:
+            ValueError: If schedule not found
+        """
+        schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
+
+        if not schedule:
+            raise ValueError(f"Schedule {schedule_id} not found")
+
+        # Convert to dict and include history
+        schedule_dict = self._schedule_to_dict(schedule)
+        schedule_dict['history'] = self.get_schedule_history(schedule_id, limit=10)
+
+        return schedule_dict
+
+    def list_schedules(
+        self,
+        page: int = 1,
+        per_page: int = 20,
+        enabled_filter: Optional[bool] = None
+    ) -> Dict[str, Any]:
+        """
+        List all schedules with pagination and filtering.
+
+        Args:
+            page: Page number (1-indexed)
+            per_page: Items per page
+            enabled_filter: Filter by enabled status (None = all)
+
+        Returns:
+            Dictionary with paginated schedules:
+            {
+                'schedules': [...],
+                'total': int,
+                'page': int,
+                'per_page': int,
+                'pages': int
+            }
+        """
+        # Build query
+        query = self.db.query(Schedule)
+
+        # Apply filter
+        if enabled_filter is not None:
+            query = query.filter(Schedule.enabled == enabled_filter)
+
+        # Order by next_run (nulls last), then by name
+        query = query.order_by(Schedule.next_run.is_(None), Schedule.next_run, Schedule.name)
+
+        # Paginate
+        result = paginate(query, page=page, per_page=per_page)
+
+        # Convert schedules to dicts
+        schedules = [self._schedule_to_dict(s) for s in result.items]
+
+        return {
+            'schedules': schedules,
+            'total': result.total,
+            'page': result.page,
+            'per_page': result.per_page,
+            'pages': result.pages
+        }
+
+    def update_schedule(
+        self,
+        schedule_id: int,
+        **updates: Any
+    ) -> Dict[str, Any]:
+        """
+        Update schedule fields.
+
+        Args:
+            schedule_id: Schedule ID
+            **updates: Fields to update (name, config_file, cron_expression, enabled)
+
+        Returns:
+            Updated schedule dictionary
+
+        Raises:
+            ValueError: If schedule not found or invalid updates
+        """
+        schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
+
+        if not schedule:
+            raise ValueError(f"Schedule {schedule_id} not found")
+
+        # Validate cron expression if being updated
+        if 'cron_expression' in updates:
+            is_valid, error_msg = self.validate_cron_expression(updates['cron_expression'])
+            if not is_valid:
+                raise ValueError(f"Invalid cron expression: {error_msg}")
+            # Recalculate next_run
+            if schedule.enabled or updates.get('enabled', False):
+                updates['next_run'] = self.calculate_next_run(updates['cron_expression'])
+
+        # Validate config file if being updated
+        if 'config_file' in updates:
+            config_file = updates['config_file']
+            # If config_file is just a filename, prepend the configs directory
+            if not config_file.startswith('/'):
+                config_file_path = os.path.join('/app/configs', config_file)
+            else:
+                config_file_path = config_file
+
+            if not os.path.isfile(config_file_path):
+                raise ValueError(f"Config file not found: {updates['config_file']}")
+
+        # Handle enabled toggle
+        if 'enabled' in updates:
+            if updates['enabled'] and not schedule.enabled:
+                # Being enabled - calculate next_run
+                cron_expr = updates.get('cron_expression', schedule.cron_expression)
+                updates['next_run'] = self.calculate_next_run(cron_expr)
+            elif not updates['enabled'] and schedule.enabled:
+                # Being disabled - clear next_run
+                updates['next_run'] = None
+
+        # Update fields
+        for key, value in updates.items():
+            if hasattr(schedule, key):
+                setattr(schedule, key, value)
+
+        schedule.updated_at = datetime.utcnow()
+
+        self.db.commit()
+        self.db.refresh(schedule)
+
+        logger.info(f"Schedule {schedule_id} updated: {list(updates.keys())}")
+
+        return self._schedule_to_dict(schedule)
+
+    def delete_schedule(self, schedule_id: int) -> bool:
+        """
+        Delete a schedule.
+
+        Note: Associated scans are NOT deleted (schedule_id becomes null).
+
+        Args:
+            schedule_id: Schedule ID
+
+        Returns:
+            True if deleted successfully
+
+        Raises:
+            ValueError: If schedule not found
+        """
+        schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
+
+        if not schedule:
+            raise ValueError(f"Schedule {schedule_id} not found")
+
+        schedule_name = schedule.name
+
+        self.db.delete(schedule)
+        self.db.commit()
+
+        logger.info(f"Schedule {schedule_id} ('{schedule_name}') deleted")
+
+        return True
+
+    def toggle_enabled(self, schedule_id: int, enabled: bool) -> Dict[str, Any]:
+        """
+        Enable or disable a schedule.
+
+        Args:
+            schedule_id: Schedule ID
+            enabled: New enabled status
+
+        Returns:
+            Updated schedule dictionary
+
+        Raises:
+            ValueError: If schedule not found
+        """
+        return self.update_schedule(schedule_id, enabled=enabled)
+
+    def update_run_times(
+        self,
+        schedule_id: int,
+        last_run: datetime,
+        next_run: datetime
+    ) -> bool:
+        """
+        Update last_run and next_run timestamps.
+
+        Called after each execution.
+
+        Args:
+            schedule_id: Schedule ID
+            last_run: Last execution time
+            next_run: Next scheduled execution time
+
+        Returns:
+            True if updated successfully
+
+        Raises:
+            ValueError: If schedule not found
+        """
+        schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
+
+        if not schedule:
+            raise ValueError(f"Schedule {schedule_id} not found")
+
+        schedule.last_run = last_run
+        schedule.next_run = next_run
+        schedule.updated_at = datetime.utcnow()
+
+        self.db.commit()
+
+        logger.debug(f"Schedule {schedule_id} run times updated: last={last_run}, next={next_run}")
+
+        return True
+
+    def validate_cron_expression(self, cron_expr: str) -> Tuple[bool, Optional[str]]:
+        """
+        Validate a cron expression.
+
+        Args:
+            cron_expr: Cron expression to validate
+
+        Returns:
+            Tuple of (is_valid, error_message)
+            - (True, None) if valid
+            - (False, error_message) if invalid
+        """
+        try:
+            # Try to create a croniter instance
+            base_time = datetime.utcnow()
+            cron = croniter(cron_expr, base_time)
+
+            # Try to get the next run time (validates the expression)
+            cron.get_next(datetime)
+
+            return (True, None)
+        except (ValueError, KeyError) as e:
+            return (False, str(e))
+        except Exception as e:
+            return (False, f"Unexpected error: {str(e)}")
+
+    def calculate_next_run(
+        self,
+        cron_expr: str,
+        from_time: Optional[datetime] = None
+    ) -> datetime:
+        """
+        Calculate next run time from cron expression.
+
+        Args:
+            cron_expr: Cron expression
+            from_time: Base time (defaults to now UTC)
+
+        Returns:
+            Next run datetime (UTC)
+
+        Raises:
+            ValueError: If cron expression is invalid
+        """
+        if from_time is None:
+            from_time = datetime.utcnow()
+
+        try:
+            cron = croniter(cron_expr, from_time)
+            return cron.get_next(datetime)
+        except Exception as e:
+            raise ValueError(f"Invalid cron expression '{cron_expr}': {str(e)}")
+
+    def get_schedule_history(
+        self,
+        schedule_id: int,
+        limit: int = 10
+    ) -> List[Dict[str, Any]]:
+        """
+        Get recent scans triggered by this schedule.
+
+        Args:
+            schedule_id: Schedule ID
+            limit: Maximum number of scans to return
+
+        Returns:
+            List of scan dictionaries (recent first)
+        """
+        scans = (
+            self.db.query(Scan)
+            .filter(Scan.schedule_id == schedule_id)
+            .order_by(Scan.timestamp.desc())
+            .limit(limit)
+            .all()
+        )
+
+        return [
+            {
+                'id': scan.id,
+                'timestamp': scan.timestamp.isoformat() if scan.timestamp else None,
+                'status': scan.status,
+                'title': scan.title,
+                'config_file': scan.config_file
+            }
+            for scan in scans
+        ]
+
+    def _schedule_to_dict(self, schedule: Schedule) -> Dict[str, Any]:
+        """
+        Convert Schedule model to dictionary.
+
+        Args:
+            schedule: Schedule model instance
+
+        Returns:
+            Dictionary representation
+        """
+        return {
+            'id': schedule.id,
+            'name': schedule.name,
+            'config_file': schedule.config_file,
+            'cron_expression': schedule.cron_expression,
+            'enabled': schedule.enabled,
+            'last_run': schedule.last_run.isoformat() if schedule.last_run else None,
+            'next_run': schedule.next_run.isoformat() if schedule.next_run else None,
+            'next_run_relative': self._get_relative_time(schedule.next_run) if schedule.next_run else None,
+            'created_at': schedule.created_at.isoformat() if schedule.created_at else None,
+            'updated_at': schedule.updated_at.isoformat() if schedule.updated_at else None
+        }
+
+    def _get_relative_time(self, dt: Optional[datetime]) -> Optional[str]:
+        """
+        Format datetime as relative time.
+
+        Args:
+            dt: Datetime to format (UTC)
+
+        Returns:
+            Human-readable relative time (e.g., "in 2 hours", "yesterday")
+        """
+        if dt is None:
+            return None
+
+        now = datetime.utcnow()
+        diff = dt - now
+
+        # Future times
+        if diff.total_seconds() > 0:
+            seconds = int(diff.total_seconds())
+
+            if seconds < 60:
+                return "in less than a minute"
+            elif seconds < 3600:
+                minutes = seconds // 60
+                return f"in {minutes} minute{'s' if minutes != 1 else ''}"
+            elif seconds < 86400:
+                hours = seconds // 3600
+                return f"in {hours} hour{'s' if hours != 1 else ''}"
+            elif seconds < 604800:
+                days = seconds // 86400
+                return f"in {days} day{'s' if days != 1 else ''}"
+            else:
+                weeks = seconds // 604800
+                return f"in {weeks} week{'s' if weeks != 1 else ''}"
+
+        # Past times
+        else:
+            seconds = int(-diff.total_seconds())
+
+            if seconds < 60:
+                return "less than a minute ago"
+            elif seconds < 3600:
+                minutes = seconds // 60
+                return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
+            elif seconds < 86400:
+                hours = seconds // 3600
+                return f"{hours} hour{'s' if hours != 1 else ''} ago"
+            elif seconds < 604800:
+                days = seconds // 86400
+                return f"{days} day{'s' if days != 1 else ''} ago"
+            else:
+                weeks = seconds // 604800
+                return f"{weeks} week{'s' if weeks != 1 else ''} ago"
--- a/app/web/services/scheduler_service.py
+++ b/app/web/services/scheduler_service.py
@@ -0,0 +1,356 @@
+"""
+Scheduler service for managing background jobs and scheduled scans.
+
+This service integrates APScheduler with Flask to enable background
+scan execution and future scheduled scanning capabilities.
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import Optional
+
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.executors.pool import ThreadPoolExecutor
+from flask import Flask
+
+from web.jobs.scan_job import execute_scan
+
+logger = logging.getLogger(__name__)
+
+
+class SchedulerService:
+    """
+    Service for managing background job scheduling.
+
+    Uses APScheduler's BackgroundScheduler to run scans asynchronously
+    without blocking HTTP requests.
+    """
+
+    def __init__(self):
+        """Initialize scheduler service (scheduler not started yet)."""
+        self.scheduler: Optional[BackgroundScheduler] = None
+        self.db_url: Optional[str] = None
+
+    def init_scheduler(self, app: Flask):
+        """
+        Initialize and start APScheduler with Flask app.
+
+        Args:
+            app: Flask application instance
+
+        Configuration:
+            - BackgroundScheduler: Runs in separate thread
+            - ThreadPoolExecutor: Allows concurrent scan execution
+            - Max workers: 3 (configurable via SCHEDULER_MAX_WORKERS)
+        """
+        if self.scheduler:
+            logger.warning("Scheduler already initialized")
+            return
+
+        # Store database URL for passing to background jobs
+        self.db_url = app.config['SQLALCHEMY_DATABASE_URI']
+
+        # Configure executor for concurrent jobs
+        max_workers = app.config.get('SCHEDULER_MAX_WORKERS', 3)
+        executors = {
+            'default': ThreadPoolExecutor(max_workers=max_workers)
+        }
+
+        # Configure job defaults
+        job_defaults = {
+            'coalesce': True,  # Combine multiple pending instances into one
+            'max_instances': app.config.get('SCHEDULER_MAX_INSTANCES', 3),
+            'misfire_grace_time': 60  # Allow 60 seconds for delayed starts
+        }
+
+        # Create scheduler with local system timezone
+        # This allows users to schedule jobs using their local time
+        # APScheduler will automatically use the system's local timezone
+        self.scheduler = BackgroundScheduler(
+            executors=executors,
+            job_defaults=job_defaults
+            # timezone defaults to local system timezone
+        )
+
+        # Start scheduler
+        self.scheduler.start()
+        logger.info(f"APScheduler started with {max_workers} max workers")
+
+        # Register shutdown handler
+        import atexit
+        atexit.register(lambda: self.shutdown())
+
+    def shutdown(self):
+        """
+        Shutdown scheduler gracefully.
+
+        Waits for running jobs to complete before shutting down.
+        """
+        if self.scheduler:
+            logger.info("Shutting down APScheduler...")
+            self.scheduler.shutdown(wait=True)
+            logger.info("APScheduler shutdown complete")
+            self.scheduler = None
+
+    def load_schedules_on_startup(self):
+        """
+        Load all enabled schedules from database and register with APScheduler.
+
+        Should be called after init_scheduler() to restore scheduled jobs
+        that were active when the application last shutdown.
+
+        Raises:
+            RuntimeError: If scheduler not initialized
+        """
+        if not self.scheduler:
+            raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
+
+        # Import here to avoid circular imports
+        from sqlalchemy import create_engine
+        from sqlalchemy.orm import sessionmaker
+        from web.models import Schedule
+
+        try:
+            # Create database session
+            engine = create_engine(self.db_url)
+            Session = sessionmaker(bind=engine)
+            session = Session()
+
+            try:
+                # Query all enabled schedules
+                enabled_schedules = (
+                    session.query(Schedule)
+                    .filter(Schedule.enabled == True)
+                    .all()
+                )
+
+                logger.info(f"Loading {len(enabled_schedules)} enabled schedules on startup")
+
+                # Register each schedule with APScheduler
+                for schedule in enabled_schedules:
+                    try:
+                        self.add_scheduled_scan(
+                            schedule_id=schedule.id,
+                            config_file=schedule.config_file,
+                            cron_expression=schedule.cron_expression
+                        )
+                        logger.info(f"Loaded schedule {schedule.id}: '{schedule.name}'")
+                    except Exception as e:
+                        logger.error(
+                            f"Failed to load schedule {schedule.id} ('{schedule.name}'): {str(e)}",
+                            exc_info=True
+                        )
+
+                logger.info("Schedule loading complete")
+
+            finally:
+                session.close()
+
+        except Exception as e:
+            logger.error(f"Error loading schedules on startup: {str(e)}", exc_info=True)
+
+    def queue_scan(self, scan_id: int, config_file: str) -> str:
+        """
+        Queue a scan for immediate background execution.
+
+        Args:
+            scan_id: Database ID of the scan
+            config_file: Path to YAML configuration file
+
+        Returns:
+            Job ID from APScheduler
+
+        Raises:
+            RuntimeError: If scheduler not initialized
+        """
+        if not self.scheduler:
+            raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
+
+        # Add job to run immediately
+        job = self.scheduler.add_job(
+            func=execute_scan,
+            args=[scan_id, config_file, self.db_url],
+            id=f'scan_{scan_id}',
+            name=f'Scan {scan_id}',
+            replace_existing=True,
+            misfire_grace_time=300  # 5 minutes
+        )
+
+        logger.info(f"Queued scan {scan_id} for background execution (job_id={job.id})")
+        return job.id
+
+    def add_scheduled_scan(self, schedule_id: int, config_file: str,
+                          cron_expression: str) -> str:
+        """
+        Add a recurring scheduled scan.
+
+        Args:
+            schedule_id: Database ID of the schedule
+            config_file: Path to YAML configuration file
+            cron_expression: Cron expression (e.g., "0 2 * * *" for 2am daily)
+
+        Returns:
+            Job ID from APScheduler
+
+        Raises:
+            RuntimeError: If scheduler not initialized
+            ValueError: If cron expression is invalid
+        """
+        if not self.scheduler:
+            raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
+
+        from apscheduler.triggers.cron import CronTrigger
+
+        # Create cron trigger from expression using local timezone
+        # This allows users to specify times in their local timezone
+        try:
+            trigger = CronTrigger.from_crontab(cron_expression)
+            # timezone defaults to local system timezone
+        except (ValueError, KeyError) as e:
+            raise ValueError(f"Invalid cron expression '{cron_expression}': {str(e)}")
+
+        # Add cron job
+        job = self.scheduler.add_job(
+            func=self._trigger_scheduled_scan,
+            args=[schedule_id],
+            trigger=trigger,
+            id=f'schedule_{schedule_id}',
+            name=f'Schedule {schedule_id}',
+            replace_existing=True,
+            max_instances=1  # Only one instance per schedule
+        )
+
+        logger.info(f"Added scheduled scan {schedule_id} with cron '{cron_expression}' (job_id={job.id})")
+        return job.id
+
+    def remove_scheduled_scan(self, schedule_id: int):
+        """
+        Remove a scheduled scan job.
+
+        Args:
+            schedule_id: Database ID of the schedule
+
+        Raises:
+            RuntimeError: If scheduler not initialized
+        """
+        if not self.scheduler:
+            raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
+
+        job_id = f'schedule_{schedule_id}'
+
+        try:
+            self.scheduler.remove_job(job_id)
+            logger.info(f"Removed scheduled scan job: {job_id}")
+        except Exception as e:
+            logger.warning(f"Failed to remove scheduled scan job {job_id}: {str(e)}")
+
+    def _trigger_scheduled_scan(self, schedule_id: int):
+        """
+        Internal method to trigger a scan from a schedule.
+
+        Creates a new scan record and queues it for execution.
+
+        Args:
+            schedule_id: Database ID of the schedule
+        """
+        logger.info(f"Scheduled scan triggered: schedule_id={schedule_id}")
+
+        # Import here to avoid circular imports
+        from sqlalchemy import create_engine
+        from sqlalchemy.orm import sessionmaker
+        from web.services.schedule_service import ScheduleService
+        from web.services.scan_service import ScanService
+
+        try:
+            # Create database session
+            engine = create_engine(self.db_url)
+            Session = sessionmaker(bind=engine)
+            session = Session()
+
+            try:
+                # Get schedule details
+                schedule_service = ScheduleService(session)
+                schedule = schedule_service.get_schedule(schedule_id)
+
+                if not schedule:
+                    logger.error(f"Schedule {schedule_id} not found")
+                    return
+
+                if not schedule['enabled']:
+                    logger.warning(f"Schedule {schedule_id} is disabled, skipping execution")
+                    return
+
+                # Create and trigger scan
+                scan_service = ScanService(session)
+                scan_id = scan_service.trigger_scan(
+                    config_file=schedule['config_file'],
+                    triggered_by='scheduled',
+                    schedule_id=schedule_id,
+                    scheduler=None  # Don't pass scheduler to avoid recursion
+                )
+
+                # Queue the scan for execution
+                self.queue_scan(scan_id, schedule['config_file'])
+
+                # Update schedule's last_run and next_run
+                from croniter import croniter
+                next_run = croniter(schedule['cron_expression'], datetime.utcnow()).get_next(datetime)
+
+                schedule_service.update_run_times(
+                    schedule_id=schedule_id,
+                    last_run=datetime.utcnow(),
+                    next_run=next_run
+                )
+
+                logger.info(f"Scheduled scan completed: schedule_id={schedule_id}, scan_id={scan_id}")
+
+            finally:
+                session.close()
+
+        except Exception as e:
+            logger.error(f"Error triggering scheduled scan {schedule_id}: {str(e)}", exc_info=True)
+
+    def get_job_status(self, job_id: str) -> Optional[dict]:
+        """
+        Get status of a scheduled job.
+
+        Args:
+            job_id: APScheduler job ID
+
+        Returns:
+            Dictionary with job information, or None if not found
+        """
+        if not self.scheduler:
+            return None
+
+        job = self.scheduler.get_job(job_id)
+        if not job:
+            return None
+
+        return {
+            'id': job.id,
+            'name': job.name,
+            'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
+            'trigger': str(job.trigger)
+        }
+
+    def list_jobs(self) -> list:
+        """
+        List all scheduled jobs.
+
+        Returns:
+            List of job information dictionaries
+        """
+        if not self.scheduler:
+            return []
+
+        jobs = self.scheduler.get_jobs()
+        return [
+            {
+                'id': job.id,
+                'name': job.name,
+                'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
+                'trigger': str(job.trigger)
+            }
+            for job in jobs
+        ]