Merge pull request 'beta' (#10 ) from beta into master

Reviewed-on: #10
Merge pull request 'nightly' (#9 ) from nightly into beta
2025-11-25 20:49:46 +00:00 · 2025-11-25 20:49:25 +00:00 · 2025-11-25 14:48:00 -06:00 · 2025-11-25 14:47:36 -06:00 · 2025-11-24 12:54:58 -06:00 · 2025-11-24 12:54:33 -06:00
13 changed files with 496 additions and 187 deletions
--- a/.env.example
+++ b/.env.example
--- a/app/src/scanner.py
+++ b/app/src/scanner.py
@@ -676,29 +676,57 @@ class SneakyScanner:
        return services
-    def _is_likely_web_service(self, service: Dict) -> bool:
+    def _is_likely_web_service(self, service: Dict, ip: str = None) -> bool:
        """
-        Check if a service is likely HTTP/HTTPS based on nmap detection or common web ports
+        Check if a service is a web server by actually making an HTTP request
        Args:
            service: Service dictionary from nmap results
            ip: IP address to test (required for HTTP probe)
        Returns:
-            True if service appears to be web-related
+            True if service responds to HTTP/HTTPS requests
        """
-        # Check service name
+        import requests
        import urllib3
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        # Quick check for known web service names first
        web_services = ['http', 'https', 'ssl', 'http-proxy', 'https-alt',
                       'http-alt', 'ssl/http', 'ssl/https']
        service_name = service.get('service', '').lower()
-        if service_name in web_services:
+        # If no IP provided, can't do HTTP probe
            return True
        # Check common non-standard web ports
        web_ports = [80, 443, 8000, 8006, 8008, 8080, 8081, 8443, 8888, 9443]
        port = service.get('port')
        if not ip or not port:
            # check just the service if no IP - honestly shouldn't get here, but just incase...
            if service_name in web_services:
                return True
            return False
-        return port in web_ports
+        # Actually try to connect - this is the definitive test
        # Try HTTPS first, then HTTP
        for protocol in ['https', 'http']:
            url = f"{protocol}://{ip}:{port}/"
            try:
                response = requests.get(
                    url,
                    timeout=3,
                    verify=False,
                    allow_redirects=False
                )
                # Any status code means it's a web server
                # (including 404, 500, etc. - still a web server)
                return True
            except requests.exceptions.SSLError:
                # SSL error on HTTPS, try HTTP next
                continue
            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout,
                    requests.exceptions.RequestException):
                continue
        return False
    def _detect_http_https(self, ip: str, port: int, timeout: int = 5) -> str:
        """
@@ -886,7 +914,7 @@ class SneakyScanner:
            ip_results = {}
            for service in services:
-                if not self._is_likely_web_service(service):
+                if not self._is_likely_web_service(service, ip):
                    continue
                port = service['port']
--- a/app/web/api/scans.py
+++ b/app/web/api/scans.py
@@ -7,6 +7,9 @@ scan results.
 import json
 import logging
 from datetime import datetime
 from pathlib import Path
 from flask import Blueprint, current_app, jsonify, request
 from sqlalchemy.exc import SQLAlchemyError
@@ -20,6 +23,89 @@ bp = Blueprint('scans', __name__)
 logger = logging.getLogger(__name__)
 def _recover_orphaned_scan(scan: Scan, session) -> dict:
    """
    Recover an orphaned scan by checking for output files.
    If output files exist: mark as 'completed' (smart recovery)
    If no output files: mark as 'cancelled'
    Args:
        scan: The orphaned Scan object
        session: Database session
    Returns:
        Dictionary with recovery result for API response
    """
    # Check for existing output files
    output_exists = False
    output_files_found = []
    # Check paths stored in database
    if scan.json_path and Path(scan.json_path).exists():
        output_exists = True
        output_files_found.append('json')
    if scan.html_path and Path(scan.html_path).exists():
        output_files_found.append('html')
    if scan.zip_path and Path(scan.zip_path).exists():
        output_files_found.append('zip')
    # Also check by timestamp pattern if paths not stored yet
    if not output_exists and scan.started_at:
        output_dir = Path('/app/output')
        if output_dir.exists():
            timestamp_pattern = scan.started_at.strftime('%Y%m%d')
            for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
                output_exists = True
                output_files_found.append('json')
                # Update scan record with found paths
                scan.json_path = str(json_file)
                html_file = json_file.with_suffix('.html')
                if html_file.exists():
                    scan.html_path = str(html_file)
                    output_files_found.append('html')
                zip_file = json_file.with_suffix('.zip')
                if zip_file.exists():
                    scan.zip_path = str(zip_file)
                    output_files_found.append('zip')
                break
    if output_exists:
        # Smart recovery: outputs exist, mark as completed
        scan.status = 'completed'
        scan.completed_at = datetime.utcnow()
        if scan.started_at:
            scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
        scan.error_message = None
        session.commit()
        logger.info(f"Scan {scan.id}: Recovered as completed (files: {output_files_found})")
        return {
            'scan_id': scan.id,
            'status': 'completed',
            'message': f'Scan recovered as completed (output files found: {", ".join(output_files_found)})',
            'recovery_type': 'smart_recovery'
        }
    else:
        # No outputs: mark as cancelled
        scan.status = 'cancelled'
        scan.completed_at = datetime.utcnow()
        if scan.started_at:
            scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
        scan.error_message = 'Scan process was interrupted before completion. No output files were generated.'
        session.commit()
        logger.info(f"Scan {scan.id}: Marked as cancelled (orphaned, no output files)")
        return {
            'scan_id': scan.id,
            'status': 'cancelled',
            'message': 'Orphaned scan cancelled (no output files found)',
            'recovery_type': 'orphan_cleanup'
        }
@bp.route('', methods=['GET'])
@api_auth_required
 def list_scans():
@@ -247,18 +333,23 @@ def delete_scan(scan_id):
@api_auth_required
 def stop_running_scan(scan_id):
    """
-    Stop a running scan.
+    Stop a running scan with smart recovery for orphaned scans.
    If the scan is actively running in the registry, sends a cancel signal.
    If the scan shows as running/finalizing but is not in the registry (orphaned),
    performs smart recovery: marks as 'completed' if output files exist,
    otherwise marks as 'cancelled'.
    Args:
        scan_id: Scan ID to stop
    Returns:
-        JSON response with stop status
+        JSON response with stop status or recovery result
    """
    try:
        session = current_app.db_session
-        # Check if scan exists and is running
+        # Check if scan exists
        scan = session.query(Scan).filter_by(id=scan_id).first()
        if not scan:
            logger.warning(f"Scan not found for stop request: {scan_id}")
@@ -267,7 +358,8 @@ def stop_running_scan(scan_id):
                'message': f'Scan with ID {scan_id} not found'
            }), 404
-        if scan.status != 'running':
+        # Allow stopping scans with status 'running' or 'finalizing'
        if scan.status not in ('running', 'finalizing'):
            logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'")
            return jsonify({
                'error': 'Invalid state',
@@ -288,11 +380,11 @@ def stop_running_scan(scan_id):
                'status': 'stopping'
            }), 200
        else:
-            logger.warning(f"Failed to stop scan {scan_id}: not found in running scanners")
+            # Scanner not in registry - this is an orphaned scan
-            return jsonify({
+            # Attempt smart recovery
-                'error': 'Stop failed',
+            logger.warning(f"Scan {scan_id} not in registry, attempting smart recovery")
-                'message': 'Scan not found in running scanners registry'
+            recovery_result = _recover_orphaned_scan(scan, session)
-            }), 404
+            return jsonify(recovery_result), 200
    except SQLAlchemyError as e:
        logger.error(f"Database error stopping scan {scan_id}: {str(e)}")
--- a/app/web/app.py
+++ b/app/web/app.py
@@ -307,9 +307,12 @@ def init_scheduler(app: Flask) -> None:
    with app.app_context():
        # Clean up any orphaned scans from previous crashes/restarts
        scan_service = ScanService(app.db_session)
-        orphaned_count = scan_service.cleanup_orphaned_scans()
+        cleanup_result = scan_service.cleanup_orphaned_scans()
-        if orphaned_count > 0:
+        if cleanup_result['total'] > 0:
-            app.logger.warning(f"Cleaned up {orphaned_count} orphaned scan(s) on startup")
+            app.logger.warning(
                f"Cleaned up {cleanup_result['total']} orphaned scan(s) on startup: "
                f"{cleanup_result['recovered']} recovered, {cleanup_result['failed']} failed"
            )
        # Load all enabled schedules from database
        scheduler.load_schedules_on_startup()
--- a/app/web/jobs/scan_job.py
+++ b/app/web/jobs/scan_job.py
@@ -240,14 +240,47 @@ def execute_scan(scan_id: int, config_id: int, db_url: str = None):
        scan_duration = (end_time - start_time).total_seconds()
        logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds")
-        # Generate output files (JSON, HTML, ZIP)
+        # Transition to 'finalizing' status before output generation
-        logger.info(f"Scan {scan_id}: Generating output files...")
+        try:
-        output_paths = scanner.generate_outputs(report, timestamp)
+            scan = session.query(Scan).filter_by(id=scan_id).first()
            if scan:
                scan.status = 'finalizing'
                scan.current_phase = 'generating_outputs'
                session.commit()
                logger.info(f"Scan {scan_id}: Status changed to 'finalizing'")
        except Exception as e:
            logger.error(f"Scan {scan_id}: Failed to update status to finalizing: {e}")
            session.rollback()
-        # Save results to database
+        # Generate output files (JSON, HTML, ZIP) with error handling
-        logger.info(f"Scan {scan_id}: Saving results to database...")
+        output_paths = {}
-        scan_service = ScanService(session)
+        output_generation_failed = False
-        scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths)
+        try:
            logger.info(f"Scan {scan_id}: Generating output files...")
            output_paths = scanner.generate_outputs(report, timestamp)
        except Exception as e:
            output_generation_failed = True
            logger.error(f"Scan {scan_id}: Output generation failed: {str(e)}")
            logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}")
            # Still mark scan as completed with warning since scan data is valid
            try:
                scan = session.query(Scan).filter_by(id=scan_id).first()
                if scan:
                    scan.status = 'completed'
                    scan.error_message = f"Scan completed but output file generation failed: {str(e)}"
                    scan.completed_at = datetime.utcnow()
                    if scan.started_at:
                        scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
                    session.commit()
                    logger.info(f"Scan {scan_id}: Marked as completed with output generation warning")
            except Exception as db_error:
                logger.error(f"Scan {scan_id}: Failed to update status after output error: {db_error}")
        # Save results to database (only if output generation succeeded)
        if not output_generation_failed:
            logger.info(f"Scan {scan_id}: Saving results to database...")
            scan_service = ScanService(session)
            scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths)
        # Evaluate alert rules
        logger.info(f"Scan {scan_id}: Evaluating alert rules...")
--- a/app/web/models.py
+++ b/app/web/models.py
@@ -45,7 +45,7 @@ class Scan(Base):
    id = Column(Integer, primary_key=True, autoincrement=True)
    timestamp = Column(DateTime, nullable=False, index=True, comment="Scan start time (UTC)")
    duration = Column(Float, nullable=True, comment="Total scan duration in seconds")
-    status = Column(String(20), nullable=False, default='running', comment="running, completed, failed")
+    status = Column(String(20), nullable=False, default='running', comment="running, finalizing, completed, failed, cancelled")
    config_id = Column(Integer, ForeignKey('scan_configs.id'), nullable=True, index=True, comment="FK to scan_configs table")
    title = Column(Text, nullable=True, comment="Scan title from config")
    json_path = Column(Text, nullable=True, comment="Path to JSON report")
--- a/app/web/services/scan_service.py
+++ b/app/web/services/scan_service.py
@@ -286,52 +286,96 @@ class ScanService:
        return [self._scan_to_summary_dict(scan) for scan in scans]
-    def cleanup_orphaned_scans(self) -> int:
+    def cleanup_orphaned_scans(self) -> dict:
        """
-        Clean up orphaned scans that are stuck in 'running' status.
+        Clean up orphaned scans with smart recovery.
        For scans stuck in 'running' or 'finalizing' status:
        - If output files exist: mark as 'completed' (smart recovery)
        - If no output files: mark as 'failed'
        This should be called on application startup to handle scans that
        were running when the system crashed or was restarted.
        Scans in 'running' status are marked as 'failed' with an appropriate
        error message indicating they were orphaned.
        Returns:
-            Number of orphaned scans cleaned up
+            Dictionary with cleanup results: {'recovered': N, 'failed': N, 'total': N}
        """
-        # Find all scans with status='running'
+        # Find all scans with status='running' or 'finalizing'
-        orphaned_scans = self.db.query(Scan).filter(Scan.status == 'running').all()
+        orphaned_scans = self.db.query(Scan).filter(
            Scan.status.in_(['running', 'finalizing'])
        ).all()
        if not orphaned_scans:
            logger.info("No orphaned scans found")
-            return 0
+            return {'recovered': 0, 'failed': 0, 'total': 0}
        count = len(orphaned_scans)
-        logger.warning(f"Found {count} orphaned scan(s) in 'running' status, marking as failed")
+        logger.warning(f"Found {count} orphaned scan(s), attempting smart recovery")
        recovered_count = 0
        failed_count = 0
        output_dir = Path('/app/output')
        # Mark each orphaned scan as failed
        for scan in orphaned_scans:
-            scan.status = 'failed'
+            # Check for existing output files
            output_exists = False
            output_files_found = []
            # Check paths stored in database
            if scan.json_path and Path(scan.json_path).exists():
                output_exists = True
                output_files_found.append('json')
            if scan.html_path and Path(scan.html_path).exists():
                output_files_found.append('html')
            if scan.zip_path and Path(scan.zip_path).exists():
                output_files_found.append('zip')
            # Also check by timestamp pattern if paths not stored yet
            if not output_exists and scan.started_at and output_dir.exists():
                timestamp_pattern = scan.started_at.strftime('%Y%m%d')
                for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
                    output_exists = True
                    output_files_found.append('json')
                    # Update scan record with found paths
                    scan.json_path = str(json_file)
                    html_file = json_file.with_suffix('.html')
                    if html_file.exists():
                        scan.html_path = str(html_file)
                        output_files_found.append('html')
                    zip_file = json_file.with_suffix('.zip')
                    if zip_file.exists():
                        scan.zip_path = str(zip_file)
                        output_files_found.append('zip')
                    break
            if output_exists:
                # Smart recovery: outputs exist, mark as completed
                scan.status = 'completed'
                scan.error_message = f'Recovered from orphaned state (output files found: {", ".join(output_files_found)})'
                recovered_count += 1
                logger.info(f"Recovered orphaned scan {scan.id} as completed (files: {output_files_found})")
            else:
                # No outputs: mark as failed
                scan.status = 'failed'
                scan.error_message = (
                    "Scan was interrupted by system shutdown or crash. "
                    "No output files were generated."
                )
                failed_count += 1
                logger.info(f"Marked orphaned scan {scan.id} as failed (no output files)")
            scan.completed_at = datetime.utcnow()
            scan.error_message = (
                "Scan was interrupted by system shutdown or crash. "
                "The scan was running but did not complete normally."
            )
            # Calculate duration if we have a started_at time
            if scan.started_at:
-                duration = (datetime.utcnow() - scan.started_at).total_seconds()
+                scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
                scan.duration = duration
            logger.info(
                f"Marked orphaned scan {scan.id} as failed "
                f"(started: {scan.started_at.isoformat() if scan.started_at else 'unknown'})"
            )
        self.db.commit()
-        logger.info(f"Cleaned up {count} orphaned scan(s)")
+        logger.info(f"Cleaned up {count} orphaned scan(s): {recovered_count} recovered, {failed_count} failed")
-        return count
+        return {
            'recovered': recovered_count,
            'failed': failed_count,
            'total': count
        }
    def _save_scan_to_db(self, report: Dict[str, Any], scan_id: int,
                        status: str = 'completed', output_paths: Dict = None) -> None:
--- a/app/web/services/schedule_service.py
+++ b/app/web/services/schedule_service.py
@@ -6,7 +6,7 @@ scheduled scans with cron expressions.
 """
 import logging
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple
 from croniter import croniter
@@ -71,6 +71,7 @@ class ScheduleService:
        next_run = self.calculate_next_run(cron_expression) if enabled else None
        # Create schedule record
        now_utc = datetime.now(timezone.utc)
        schedule = Schedule(
            name=name,
            config_id=config_id,
@@ -78,8 +79,8 @@ class ScheduleService:
            enabled=enabled,
            last_run=None,
            next_run=next_run,
-            created_at=datetime.utcnow(),
+            created_at=now_utc,
-            updated_at=datetime.utcnow()
+            updated_at=now_utc
        )
        self.db.add(schedule)
@@ -103,7 +104,14 @@ class ScheduleService:
        Raises:
            ValueError: If schedule not found
        """
-        schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
+        from sqlalchemy.orm import joinedload
        schedule = (
            self.db.query(Schedule)
            .options(joinedload(Schedule.config))
            .filter(Schedule.id == schedule_id)
            .first()
        )
        if not schedule:
            raise ValueError(f"Schedule {schedule_id} not found")
@@ -138,8 +146,10 @@ class ScheduleService:
                'pages': int
            }
        """
-        # Build query
+        from sqlalchemy.orm import joinedload
-        query = self.db.query(Schedule)
+
        # Build query and eagerly load config relationship
        query = self.db.query(Schedule).options(joinedload(Schedule.config))
        # Apply filter
        if enabled_filter is not None:
@@ -215,7 +225,7 @@ class ScheduleService:
            if hasattr(schedule, key):
                setattr(schedule, key, value)
-        schedule.updated_at = datetime.utcnow()
+        schedule.updated_at = datetime.now(timezone.utc)
        self.db.commit()
        self.db.refresh(schedule)
@@ -298,7 +308,7 @@ class ScheduleService:
        schedule.last_run = last_run
        schedule.next_run = next_run
-        schedule.updated_at = datetime.utcnow()
+        schedule.updated_at = datetime.now(timezone.utc)
        self.db.commit()
@@ -311,23 +321,43 @@ class ScheduleService:
        Validate a cron expression.
        Args:
-            cron_expr: Cron expression to validate
+            cron_expr: Cron expression to validate in standard crontab format
                      Format: minute hour day month day_of_week
                      Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday
                      (APScheduler will convert this to its internal format automatically)
        Returns:
            Tuple of (is_valid, error_message)
            - (True, None) if valid
            - (False, error_message) if invalid
        Note:
            This validates using croniter which uses standard crontab format.
            APScheduler's from_crontab() will handle the conversion when the
            schedule is registered with the scheduler.
        """
        try:
            # Try to create a croniter instance
-            base_time = datetime.utcnow()
+            # croniter uses standard crontab format (Sunday=0)
            from datetime import timezone
            base_time = datetime.now(timezone.utc)
            cron = croniter(cron_expr, base_time)
            # Try to get the next run time (validates the expression)
            cron.get_next(datetime)
            # Validate basic format (5 fields)
            fields = cron_expr.split()
            if len(fields) != 5:
                return (False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}")
            return (True, None)
        except (ValueError, KeyError) as e:
            error_msg = str(e)
            # Add helpful hint for day_of_week errors
            if "day" in error_msg.lower() and len(cron_expr.split()) >= 5:
                hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
                return (False, f"{error_msg}{hint}")
            return (False, str(e))
        except Exception as e:
            return (False, f"Unexpected error: {str(e)}")
@@ -345,17 +375,24 @@ class ScheduleService:
            from_time: Base time (defaults to now UTC)
        Returns:
-            Next run datetime (UTC)
+            Next run datetime (UTC, timezone-aware)
        Raises:
            ValueError: If cron expression is invalid
        """
        if from_time is None:
-            from_time = datetime.utcnow()
+            from_time = datetime.now(timezone.utc)
        try:
            cron = croniter(cron_expr, from_time)
-            return cron.get_next(datetime)
+            next_run = cron.get_next(datetime)
            # croniter returns naive datetime, so we need to add timezone info
            # Since we're using UTC for all calculations, add UTC timezone
            if next_run.tzinfo is None:
                next_run = next_run.replace(tzinfo=timezone.utc)
            return next_run
        except Exception as e:
            raise ValueError(f"Invalid cron expression '{cron_expr}': {str(e)}")
@@ -403,10 +440,16 @@ class ScheduleService:
        Returns:
            Dictionary representation
        """
        # Get config title if relationship is loaded
        config_name = None
        if schedule.config:
            config_name = schedule.config.title
        return {
            'id': schedule.id,
            'name': schedule.name,
            'config_id': schedule.config_id,
            'config_name': config_name,
            'cron_expression': schedule.cron_expression,
            'enabled': schedule.enabled,
            'last_run': schedule.last_run.isoformat() if schedule.last_run else None,
@@ -421,7 +464,7 @@ class ScheduleService:
        Format datetime as relative time.
        Args:
-            dt: Datetime to format (UTC)
+            dt: Datetime to format (UTC, can be naive or aware)
        Returns:
            Human-readable relative time (e.g., "in 2 hours", "yesterday")
@@ -429,7 +472,13 @@ class ScheduleService:
        if dt is None:
            return None
-        now = datetime.utcnow()
+        # Ensure both datetimes are timezone-aware for comparison
        now = datetime.now(timezone.utc)
        # If dt is naive, assume it's UTC and add timezone info
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        diff = dt - now
        # Future times
--- a/app/web/services/scheduler_service.py
+++ b/app/web/services/scheduler_service.py
@@ -149,6 +149,51 @@ class SchedulerService:
        except Exception as e:
            logger.error(f"Error loading schedules on startup: {str(e)}", exc_info=True)
    @staticmethod
    def validate_cron_expression(cron_expression: str) -> tuple[bool, str]:
        """
        Validate a cron expression and provide helpful feedback.
        Args:
            cron_expression: Cron expression to validate
        Returns:
            Tuple of (is_valid: bool, message: str)
            - If valid: (True, "Valid cron expression")
            - If invalid: (False, "Error message with details")
        Note:
            Standard crontab format: minute hour day month day_of_week
            Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday (or 7=Sunday)
        """
        from apscheduler.triggers.cron import CronTrigger
        try:
            # Try to parse the expression
            trigger = CronTrigger.from_crontab(cron_expression)
            # Validate basic format (5 fields)
            fields = cron_expression.split()
            if len(fields) != 5:
                return False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}"
            return True, "Valid cron expression"
        except (ValueError, KeyError) as e:
            error_msg = str(e)
            # Provide helpful hints for common errors
            if "day_of_week" in error_msg.lower() or (len(cron_expression.split()) >= 5):
                # Check if day_of_week field might be using APScheduler format by mistake
                fields = cron_expression.split()
                if len(fields) == 5:
                    dow_field = fields[4]
                    if dow_field.isdigit() and int(dow_field) >= 0:
                        hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
                        return False, f"Invalid cron expression: {error_msg}{hint}"
            return False, f"Invalid cron expression: {error_msg}"
    def queue_scan(self, scan_id: int, config_id: int) -> str:
        """
        Queue a scan for immediate background execution.
@@ -188,6 +233,10 @@ class SchedulerService:
            schedule_id: Database ID of the schedule
            config_id: Database config ID
            cron_expression: Cron expression (e.g., "0 2 * * *" for 2am daily)
                           IMPORTANT: Use standard crontab format where:
                           - Day of week: 0 = Sunday, 1 = Monday, ..., 6 = Saturday
                           - APScheduler automatically converts to its internal format
                           - from_crontab() handles the conversion properly
        Returns:
            Job ID from APScheduler
@@ -195,18 +244,29 @@ class SchedulerService:
        Raises:
            RuntimeError: If scheduler not initialized
            ValueError: If cron expression is invalid
        Note:
            APScheduler internally uses Monday=0, but from_crontab() accepts
            standard crontab format (Sunday=0) and converts it automatically.
        """
        if not self.scheduler:
            raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
        from apscheduler.triggers.cron import CronTrigger
        # Validate cron expression first to provide helpful error messages
        is_valid, message = self.validate_cron_expression(cron_expression)
        if not is_valid:
            raise ValueError(message)
        # Create cron trigger from expression using local timezone
-        # This allows users to specify times in their local timezone
+        # from_crontab() parses standard crontab format (Sunday=0)
        # and converts to APScheduler's internal format (Monday=0) automatically
        try:
            trigger = CronTrigger.from_crontab(cron_expression)
            # timezone defaults to local system timezone
        except (ValueError, KeyError) as e:
            # This should not happen due to validation above, but catch anyway
            raise ValueError(f"Invalid cron expression '{cron_expression}': {str(e)}")
        # Add cron job
@@ -294,11 +354,16 @@ class SchedulerService:
                # Update schedule's last_run and next_run
                from croniter import croniter
-                next_run = croniter(schedule['cron_expression'], datetime.utcnow()).get_next(datetime)
+                now_utc = datetime.now(timezone.utc)
                next_run = croniter(schedule['cron_expression'], now_utc).get_next(datetime)
                # croniter returns naive datetime, add UTC timezone
                if next_run.tzinfo is None:
                    next_run = next_run.replace(tzinfo=timezone.utc)
                schedule_service.update_run_times(
                    schedule_id=schedule_id,
-                    last_run=datetime.utcnow(),
+                    last_run=now_utc,
                    next_run=next_run
                )
--- a/app/web/templates/schedule_edit.html
+++ b/app/web/templates/schedule_edit.html
@@ -298,7 +298,11 @@ async function loadSchedule() {
 function populateForm(schedule) {
    document.getElementById('schedule-id').value = schedule.id;
    document.getElementById('schedule-name').value = schedule.name;
-    document.getElementById('config-id').value = schedule.config_id;
+    // Display config name and ID in the readonly config-file field
    const configDisplay = schedule.config_name
        ? `${schedule.config_name} (ID: ${schedule.config_id})`
        : `Config ID: ${schedule.config_id}`;
    document.getElementById('config-file').value = configDisplay;
    document.getElementById('cron-expression').value = schedule.cron_expression;
    document.getElementById('schedule-enabled').checked = schedule.enabled;
--- a/app/web/utils/validators.py
+++ b/app/web/utils/validators.py
@@ -23,7 +23,7 @@ def validate_scan_status(status: str) -> tuple[bool, Optional[str]]:
        >>> validate_scan_status('invalid')
        (False, 'Invalid status: invalid. Must be one of: running, completed, failed')
    """
-    valid_statuses = ['running', 'completed', 'failed', 'cancelled']
+    valid_statuses = ['running', 'finalizing', 'completed', 'failed', 'cancelled']
    if status not in valid_statuses:
        return False, f'Invalid status: {status}. Must be one of: {", ".join(valid_statuses)}'
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -24,10 +24,10 @@ SneakyScanner is deployed as a Docker container running a Flask web application
 **Architecture:**
 - **Web Application**: Flask app on port 5000 with modern web UI
- **Database**: SQLite (persisted to volume)
+- **Database**: SQLite (persisted to volume) - stores all configurations, scan results, and settings
 - **Background Jobs**: APScheduler for async scan execution
 - **Scanner**: masscan, nmap, sslyze, Playwright
- **Config Creator**: Web-based CIDR-to-YAML configuration builder
+- **Config Management**: Database-backed configuration system managed entirely via web UI
 - **Scheduling**: Cron-based scheduled scans with dashboard management
 ---
@@ -143,6 +143,13 @@ docker compose -f docker-compose-standalone.yml up
 SneakyScanner is configured via environment variables. The recommended approach is to use a `.env` file.
 **UDP Port Scanning**
 - UDP Port scanning is disabled by default.
 - You can turn it on via the .env variable.
 - By Default, UDP port scanning only scans the top 20 ports, for convenience I have included the NMAP top 100 UDP ports as well.
 #### Creating Your .env File
 ```bash
@@ -160,6 +167,7 @@ python3 -c "from cryptography.fernet import Fernet; print('SNEAKYSCANNER_ENCRYPT
 nano .env
 ```
 #### Key Configuration Options
 | Variable | Description | Default | Required |
@@ -190,54 +198,30 @@ The application needs these directories (created automatically by Docker):
 ```bash
 # Verify directories exist
-ls -la configs/ data/ output/ logs/
+ls -la data/ output/ logs/
 # If missing, create them
-mkdir -p configs data output logs
+mkdir -p data output logs
 ```
 ### Step 2: Configure Scan Targets
-You can create scan configurations in two ways:
+After starting the application, create scan configurations using the web UI:
-**Option A: Using the Web UI (Recommended - Phase 4 Feature)**
+**Creating Configurations via Web UI**
 1. Navigate to **Configs** in the web interface
 2. Click **"Create New Config"**
-3. Use the CIDR-based config creator for quick setup:
+3. Use the form-based config creator:
   - Enter site name
   - Enter CIDR range (e.g., `192.168.1.0/24`)
-   - Select expected ports from dropdowns
+   - Select expected TCP/UDP ports from dropdowns
-   - Click **"Generate Config"**
+   - Optionally enable ping checks
-4. Or use the **YAML Editor** for advanced configurations
+4. Click **"Save Configuration"**
-5. Save and use immediately in scans or schedules
+5. Configuration is saved to database and immediately available for scans and schedules
-**Option B: Manual YAML Files**
+**Note**: All configurations are stored in the database, not as files. This provides better reliability, easier backup, and seamless management through the web interface.
 Create YAML configuration files manually in the `configs/` directory:
 ```bash
 # Example configuration
 cat > configs/my-network.yaml <<EOF
 title: "My Network Infrastructure"
 sites:
  - name: "Web Servers"
    cidr: "192.168.1.0/24"  # Scan entire subnet
    expected_ports:
      - port: 80
        protocol: tcp
        service: "http"
      - port: 443
        protocol: tcp
        service: "https"
      - port: 22
        protocol: tcp
        service: "ssh"
    ping_expected: true
 EOF
 ```
 **Note**: Phase 4 introduced a powerful config creator in the web UI that makes it easy to generate configs from CIDR ranges without manually editing YAML.
 ### Step 3: Build Docker Image
@@ -389,38 +373,37 @@ The dashboard provides a central view of your scanning activity:
 - **Trend Charts**: Port count trends over time using Chart.js
 - **Quick Actions**: Buttons to run scans, create configs, manage schedules
-### Managing Scan Configurations (Phase 4)
+### Managing Scan Configurations
 All scan configurations are stored in the database and managed entirely through the web interface.
 **Creating Configs:**
 1. Navigate to **Configs** → **Create New Config**
-2. **CIDR Creator Mode**:
+2. Fill in the configuration form:
   - Enter site name (e.g., "Production Servers")
   - Enter CIDR range (e.g., `192.168.1.0/24`)
   - Select expected TCP/UDP ports from dropdowns
-   - Click **"Generate Config"** to create YAML
+   - Enable/disable ping checks
-3. **YAML Editor Mode**:
+3. Click **"Save Configuration"**
-   - Switch to editor tab for advanced configurations
+4. Configuration is immediately stored in database and available for use
   - Syntax highlighting with line numbers
   - Validate YAML before saving
 **Editing Configs:**
-1. Navigate to **Configs** → Select config
+1. Navigate to **Configs** → Select config from list
 2. Click **"Edit"** button
-3. Make changes in YAML editor
+3. Modify any fields in the configuration form
-4. Save changes (validates YAML automatically)
+4. Click **"Save Changes"** to update database
-**Uploading Configs:**
+**Viewing Configs:**
-1. Navigate to **Configs** → **Upload**
+- Navigate to **Configs** page to see all saved configurations
-2. Select YAML file from your computer
+- Each config shows site name, CIDR range, and expected ports
-3. File is validated and saved to `configs/` directory
+- Click on any config to view full details
 **Downloading Configs:**
 - Click **"Download"** button next to any config
 - Saves YAML file to your local machine
 **Deleting Configs:**
- Click **"Delete"** button
+- Click **"Delete"** button next to any config
 - **Warning**: Cannot delete configs used by active schedules
 - Deletion removes the configuration from the database permanently
 **Note**: All configurations are database-backed, providing automatic backups when you backup the database file.
 ### Running Scans
@@ -477,12 +460,11 @@ SneakyScanner uses several mounted volumes for data persistence:
 | Volume | Container Path | Purpose | Important? |
 |--------|----------------|---------|------------|
-| `./configs` | `/app/configs` | Scan configuration files (managed via web UI) | Yes |
+| `./data` | `/app/data` | SQLite database (contains configurations, scan history, settings) | **Critical** |
 | `./data` | `/app/data` | SQLite database (contains all scan history) | **Critical** |
 | `./output` | `/app/output` | Scan results (JSON, HTML, ZIP, screenshots) | Yes |
 | `./logs` | `/app/logs` | Application logs (rotating file handler) | No |
-**Note**: As of Phase 4, the `./configs` volume is read-write to support the web-based config creator and editor. The web UI can now create, edit, and delete configuration files directly.
+**Note**: All scan configurations are stored in the SQLite database (`./data/sneakyscanner.db`). There is no separate configs directory or YAML files. Backing up the database file ensures all your configurations are preserved.
 ### Backing Up Data
@@ -490,23 +472,22 @@ SneakyScanner uses several mounted volumes for data persistence:
 # Create backup directory
 mkdir -p backups/$(date +%Y%m%d)
-# Backup database
+# Backup database (includes all configurations)
 cp data/sneakyscanner.db backups/$(date +%Y%m%d)/
 # Backup scan outputs
 tar -czf backups/$(date +%Y%m%d)/output.tar.gz output/
 # Backup configurations
 tar -czf backups/$(date +%Y%m%d)/configs.tar.gz configs/
 ```
 **Important**: The database backup includes all scan configurations, settings, schedules, and scan history. No separate configuration file backup is needed.
 ### Restoring Data
 ```bash
 # Stop application
 docker compose -f docker-compose.yml down
-# Restore database
+# Restore database (includes all configurations)
 cp backups/YYYYMMDD/sneakyscanner.db data/
 # Restore outputs
@@ -516,6 +497,8 @@ tar -xzf backups/YYYYMMDD/output.tar.gz
 docker compose -f docker-compose.yml up -d
 ```
 **Note**: Restoring the database file restores all configurations, settings, schedules, and scan history.
 ### Cleaning Up Old Scan Results
 **Option A: Using the Web UI (Recommended)**
@@ -564,50 +547,52 @@ curl -X POST http://localhost:5000/api/auth/logout \
  -b cookies.txt
 ```
-### Config Management (Phase 4)
+### Config Management
 ```bash
 # List all configs
 curl http://localhost:5000/api/configs \
  -b cookies.txt
-# Get specific config
+# Get specific config by ID
-curl http://localhost:5000/api/configs/prod-network.yaml \
+curl http://localhost:5000/api/configs/1 \
  -b cookies.txt
 # Create new config
 curl -X POST http://localhost:5000/api/configs \
  -H "Content-Type: application/json" \
  -d '{
-    "filename": "test-network.yaml",
+    "name": "Test Network",
-    "content": "title: Test Network\nsites:\n  - name: Test\n    cidr: 10.0.0.0/24"
+    "cidr": "10.0.0.0/24",
    "expected_ports": [
      {"port": 80, "protocol": "tcp", "service": "http"},
      {"port": 443, "protocol": "tcp", "service": "https"}
    ],
    "ping_expected": true
  }' \
  -b cookies.txt
 # Update config
-curl -X PUT http://localhost:5000/api/configs/test-network.yaml \
+curl -X PUT http://localhost:5000/api/configs/1 \
  -H "Content-Type: application/json" \
  -d '{
-    "content": "title: Updated Test Network\nsites:\n  - name: Test Site\n    cidr: 10.0.0.0/24"
+    "name": "Updated Test Network",
    "cidr": "10.0.1.0/24"
  }' \
  -b cookies.txt
 # Download config
 curl http://localhost:5000/api/configs/test-network.yaml/download \
  -b cookies.txt -o test-network.yaml
 # Delete config
-curl -X DELETE http://localhost:5000/api/configs/test-network.yaml \
+curl -X DELETE http://localhost:5000/api/configs/1 \
  -b cookies.txt
 ```
 ### Scan Management
 ```bash
-# Trigger a scan
+# Trigger a scan (using config ID from database)
 curl -X POST http://localhost:5000/api/scans \
  -H "Content-Type: application/json" \
-  -d '{"config_id": "/app/configs/prod-network.yaml"}' \
+  -d '{"config_id": 1}' \
  -b cookies.txt
 # List all scans
@@ -634,12 +619,12 @@ curl -X DELETE http://localhost:5000/api/scans/123 \
 curl http://localhost:5000/api/schedules \
  -b cookies.txt
-# Create schedule
+# Create schedule (using config ID from database)
 curl -X POST http://localhost:5000/api/schedules \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Daily Production Scan",
-    "config_id": "/app/configs/prod-network.yaml",
+    "config_id": 1,
    "cron_expression": "0 2 * * *",
    "enabled": true
  }' \
@@ -875,24 +860,25 @@ docker compose -f docker-compose.yml logs web | grep -E "(ERROR|Exception|Traceb
 docker compose -f docker-compose.yml exec web which masscan nmap
 ```
-### Config Files Not Appearing in Web UI
+### Configs Not Appearing in Web UI
-**Problem**: Manually created configs don't show up in web interface
+**Problem**: Created configs don't show up in web interface
 ```bash
-# Check file permissions (must be readable by web container)
+# Check database connectivity
-ls -la configs/
+docker compose -f docker-compose.yml logs web | grep -i "database"
-# Fix permissions if needed
+# Verify database file exists and is readable
-sudo chown -R 1000:1000 configs/
+ls -lh data/sneakyscanner.db
 chmod 644 configs/*.yaml
-# Verify YAML syntax is valid
+# Check for errors when creating configs
 docker compose -f docker-compose.yml exec web python3 -c \
  "import yaml; yaml.safe_load(open('/app/configs/your-config.yaml'))"
 # Check web logs for parsing errors
 docker compose -f docker-compose.yml logs web | grep -i "config"
 # Try accessing configs via API
 curl http://localhost:5000/api/configs -b cookies.txt
 # If database is corrupted, check integrity
 docker compose -f docker-compose.yml exec web sqlite3 /app/data/sneakyscanner.db "PRAGMA integrity_check;"
 ```
 ### Health Check Failing
@@ -979,11 +965,11 @@ server {
 # Ensure proper ownership of data directories
 sudo chown -R $USER:$USER data/ output/ logs/
-# Restrict database file permissions
+# Restrict database file permissions (contains configurations and sensitive data)
 chmod 600 data/sneakyscanner.db
-# Configs should be read-only
+# Ensure database directory is writable
-chmod 444 configs/*.yaml
+chmod 700 data/
 ```
 ---
@@ -1051,19 +1037,17 @@ mkdir -p "$BACKUP_DIR"
 # Stop application for consistent backup
 docker compose -f docker-compose.yml stop web
-# Backup database
+# Backup database (includes all configurations)
 cp data/sneakyscanner.db "$BACKUP_DIR/"
 # Backup outputs (last 30 days only)
 find output/ -type f -mtime -30 -exec cp --parents {} "$BACKUP_DIR/" \;
 # Backup configs
 cp -r configs/ "$BACKUP_DIR/"
 # Restart application
 docker compose -f docker-compose.yml start web
 echo "Backup complete: $BACKUP_DIR"
 echo "Database backup includes all configurations, settings, and scan history"
 ```
 Make executable and schedule with cron:
@@ -1083,15 +1067,18 @@ crontab -e
 # Stop application
 docker compose -f docker-compose.yml down
-# Restore files
+# Restore database (includes all configurations)
 cp backups/YYYYMMDD_HHMMSS/sneakyscanner.db data/
-cp -r backups/YYYYMMDD_HHMMSS/configs/* configs/
+
 # Restore output files
 cp -r backups/YYYYMMDD_HHMMSS/output/* output/
 # Start application
 docker compose -f docker-compose.yml up -d
 ```
 **Note**: Restoring the database file will restore all configurations, settings, schedules, and scan history from the backup.
 ---
 ## Support and Further Reading
@@ -1105,13 +1092,13 @@ docker compose -f docker-compose.yml up -d
 ## What's New
-### Phase 4 (2025-11-17) - Config Creator ✅
+### Phase 4+ (2025-11-17) - Database-Backed Configuration System ✅
- **CIDR-based Config Creator**: Web UI for generating scan configs from CIDR ranges
+- **Database-Backed Configs**: All configurations stored in SQLite database (no YAML files)
- **YAML Editor**: Built-in editor with syntax highlighting (CodeMirror)
+- **Web-Based Config Creator**: Form-based UI for creating scan configs from CIDR ranges
- **Config Management UI**: List, view, edit, download, and delete configs via web interface
+- **Config Management UI**: List, view, edit, and delete configs via web interface
- **Config Upload**: Direct YAML file upload for advanced users
+- **REST API**: Full config management via RESTful API with database storage
 - **REST API**: 7 new config management endpoints
 - **Schedule Protection**: Prevents deleting configs used by active schedules
 - **Automatic Backups**: Configurations included in database backups
 ### Phase 3 (2025-11-14) - Dashboard & Scheduling ✅
 - **Dashboard**: Summary stats, recent scans, trend charts
@@ -1133,5 +1120,5 @@ docker compose -f docker-compose.yml up -d
 ---
-**Last Updated**: 2025-11-17
+**Last Updated**: 2025-11-24
-**Version**: Phase 4 - Config Creator Complete
+**Version**: Phase 4+ - Database-Backed Configuration System
--- a/docs/KNOWN_ISSUES.md
+++ b/docs/KNOWN_ISSUES.md
Author	SHA1	Message	Date
Phillip Tarrant	4b197e0b3d	Merge pull request 'beta' (#10 ) from beta into master Reviewed-on: #10	2025-11-25 20:49:46 +00:00
Phillip Tarrant	30f0987a99	Merge pull request 'nightly' (#9 ) from nightly into beta Reviewed-on: #9	2025-11-25 20:49:25 +00:00
Phillip Tarrant	9e2fc348b7	Merge branch 'bug/long-scans-break' into nightly	2025-11-25 14:48:00 -06:00
Phillip Tarrant	847e05abbe	Changes Made 1. app/web/utils/validators.py - Added 'finalizing' to valid_statuses list 2. app/web/models.py - Updated status field comment to document all valid statuses 3. app/web/jobs/scan_job.py - Added transition to 'finalizing' status before output file generation - Sets current_phase = 'generating_outputs' during this phase - Wrapped output generation in try-except with proper error handling - If output generation fails, scan is marked 'completed' with warning message (scan data is still valid) 4. app/web/api/scans.py - Added _recover_orphaned_scan() helper function for smart recovery - Modified stop_running_scan() to: - Allow stopping scans with status 'running' OR 'finalizing' - When scanner not in registry, perform smart recovery instead of returning 404 - Smart recovery checks for output files and marks as 'completed' if found, 'cancelled' if not 5. app/web/services/scan_service.py - Enhanced cleanup_orphaned_scans() with smart recovery logic - Now finds scans in both 'running' and 'finalizing' status - Returns dict with stats: {'recovered': N, 'failed': N, 'total': N} 6. app/web/app.py - Updated caller to handle new dict return type from cleanup_orphaned_scans() Expected Behavior Now 1. Normal scan flow: running → finalizing → completed 2. Stop on active scan: Sends cancel signal, becomes 'cancelled' 3. Stop on orphaned scan with files: Smart recovery → 'completed' 4. Stop on orphaned scan without files: → 'cancelled' 5. App restart with orphans: Startup cleanup uses smart recovery	2025-11-25 14:47:36 -06:00
Phillip Tarrant	07c2bcfd11	Merge branch 'beta'	2025-11-24 12:54:58 -06:00
Phillip Tarrant	a560bae800	Merge branch 'nightly' into beta	2025-11-24 12:54:33 -06:00
Phillip Tarrant	56828e4184	Merge branch 'feat/fix-cron-schedules' into nightly	2025-11-24 12:53:44 -06:00
Phillip Tarrant	5e3a70f837	Fix schedule management and update documentation for database-backed configs This commit addresses multiple issues with schedule management and updates documentation to reflect the transition from YAML-based to database-backed configuration system. Documentation Updates: - Update DEPLOYMENT.md to remove all references to YAML config files - Document that all configurations are now stored in SQLite database - Update API examples to use config IDs instead of YAML filenames - Remove configs directory from backup/restore procedures - Update volume management section to reflect database-only storage Cron Expression Handling: - Add comprehensive documentation for APScheduler cron format conversion - Document that from_crontab() accepts standard format (Sunday=0) and converts automatically - Add validate_cron_expression() helper method with detailed error messages - Include helpful hints for day-of-week field errors in validation - Fix all deprecated datetime.utcnow() calls, replace with datetime.now(timezone.utc) Timezone-Aware DateTime Fixes: - Fix "can't subtract offset-naive and offset-aware datetimes" error - Add timezone awareness to croniter.get_next() return values - Make _get_relative_time() defensive to handle both naive and aware datetimes - Ensure all datetime comparisons use timezone-aware objects Schedule Edit UI Fixes: - Fix JavaScript error "Cannot set properties of null (setting 'value')" - Change reference from non-existent 'config-id' to correct 'config-file' element - Add config_name field to schedule API responses for better UX - Eagerly load Schedule.config relationship using joinedload() - Fix AttributeError: use schedule.config.title instead of .name - Display config title and ID in schedule edit form Technical Details: - app/web/services/schedule_service.py: 6 datetime.utcnow() fixes, validation enhancements - app/web/services/scheduler_service.py: Documentation, validation, timezone fixes - app/web/templates/schedule_edit.html: JavaScript element reference fix - docs/DEPLOYMENT.md: Complete rewrite of config management sections Fixes scheduling for Sunday at midnight (cron: 0 0 * * 0) Fixes schedule edit page JavaScript errors Improves user experience with config title display	2025-11-24 12:53:06 -06:00
Phillip Tarrant	451c7e92ff	Merge pull request 'Merging beta into master' (#8 ) from beta into master Reviewed-on: #8	2025-11-21 22:07:06 +00:00
Phillip Tarrant	8b89fd506d	Merge pull request 'nightly merge into beta' (#7 ) from nightly into beta Reviewed-on: #7	2025-11-21 22:05:43 +00:00