From 847e05abbe5131db6eb4f2e573dfc83fd0d51369 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Tue, 25 Nov 2025 14:47:36 -0600 Subject: [PATCH] Changes Made 1. app/web/utils/validators.py - Added 'finalizing' to valid_statuses list 2. app/web/models.py - Updated status field comment to document all valid statuses 3. app/web/jobs/scan_job.py - Added transition to 'finalizing' status before output file generation - Sets current_phase = 'generating_outputs' during this phase - Wrapped output generation in try-except with proper error handling - If output generation fails, scan is marked 'completed' with warning message (scan data is still valid) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4. app/web/api/scans.py - Added _recover_orphaned_scan() helper function for smart recovery - Modified stop_running_scan() to: - Allow stopping scans with status 'running' OR 'finalizing' - When scanner not in registry, perform smart recovery instead of returning 404 - Smart recovery checks for output files and marks as 'completed' if found, 'cancelled' if not 5. app/web/services/scan_service.py - Enhanced cleanup_orphaned_scans() with smart recovery logic - Now finds scans in both 'running' and 'finalizing' status - Returns dict with stats: {'recovered': N, 'failed': N, 'total': N} 6. app/web/app.py - Updated caller to handle new dict return type from cleanup_orphaned_scans() Expected Behavior Now 1. Normal scan flow: running → finalizing → completed 2. Stop on active scan: Sends cancel signal, becomes 'cancelled' 3. Stop on orphaned scan with files: Smart recovery → 'completed' 4. Stop on orphaned scan without files: → 'cancelled' 5. App restart with orphans: Startup cleanup uses smart recovery --- app/web/api/scans.py | 110 ++++++++++++++++++++++++++++--- app/web/app.py | 9 ++- app/web/jobs/scan_job.py | 47 +++++++++++-- app/web/models.py | 2 +- app/web/services/scan_service.py | 98 +++++++++++++++++++-------- app/web/utils/validators.py | 2 +- 6 files changed, 220 insertions(+), 48 deletions(-) diff --git a/app/web/api/scans.py b/app/web/api/scans.py index 1f6a314..802b93d 100644 --- a/app/web/api/scans.py +++ b/app/web/api/scans.py @@ -7,6 +7,9 @@ scan results. import json import logging +from datetime import datetime +from pathlib import Path + from flask import Blueprint, current_app, jsonify, request from sqlalchemy.exc import SQLAlchemyError @@ -20,6 +23,89 @@ bp = Blueprint('scans', __name__) logger = logging.getLogger(__name__) +def _recover_orphaned_scan(scan: Scan, session) -> dict: + """ + Recover an orphaned scan by checking for output files. + + If output files exist: mark as 'completed' (smart recovery) + If no output files: mark as 'cancelled' + + Args: + scan: The orphaned Scan object + session: Database session + + Returns: + Dictionary with recovery result for API response + """ + # Check for existing output files + output_exists = False + output_files_found = [] + + # Check paths stored in database + if scan.json_path and Path(scan.json_path).exists(): + output_exists = True + output_files_found.append('json') + if scan.html_path and Path(scan.html_path).exists(): + output_files_found.append('html') + if scan.zip_path and Path(scan.zip_path).exists(): + output_files_found.append('zip') + + # Also check by timestamp pattern if paths not stored yet + if not output_exists and scan.started_at: + output_dir = Path('/app/output') + if output_dir.exists(): + timestamp_pattern = scan.started_at.strftime('%Y%m%d') + for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'): + output_exists = True + output_files_found.append('json') + # Update scan record with found paths + scan.json_path = str(json_file) + html_file = json_file.with_suffix('.html') + if html_file.exists(): + scan.html_path = str(html_file) + output_files_found.append('html') + zip_file = json_file.with_suffix('.zip') + if zip_file.exists(): + scan.zip_path = str(zip_file) + output_files_found.append('zip') + break + + if output_exists: + # Smart recovery: outputs exist, mark as completed + scan.status = 'completed' + scan.completed_at = datetime.utcnow() + if scan.started_at: + scan.duration = (datetime.utcnow() - scan.started_at).total_seconds() + scan.error_message = None + session.commit() + + logger.info(f"Scan {scan.id}: Recovered as completed (files: {output_files_found})") + + return { + 'scan_id': scan.id, + 'status': 'completed', + 'message': f'Scan recovered as completed (output files found: {", ".join(output_files_found)})', + 'recovery_type': 'smart_recovery' + } + else: + # No outputs: mark as cancelled + scan.status = 'cancelled' + scan.completed_at = datetime.utcnow() + if scan.started_at: + scan.duration = (datetime.utcnow() - scan.started_at).total_seconds() + scan.error_message = 'Scan process was interrupted before completion. No output files were generated.' + session.commit() + + logger.info(f"Scan {scan.id}: Marked as cancelled (orphaned, no output files)") + + return { + 'scan_id': scan.id, + 'status': 'cancelled', + 'message': 'Orphaned scan cancelled (no output files found)', + 'recovery_type': 'orphan_cleanup' + } + + @bp.route('', methods=['GET']) @api_auth_required def list_scans(): @@ -247,18 +333,23 @@ def delete_scan(scan_id): @api_auth_required def stop_running_scan(scan_id): """ - Stop a running scan. + Stop a running scan with smart recovery for orphaned scans. + + If the scan is actively running in the registry, sends a cancel signal. + If the scan shows as running/finalizing but is not in the registry (orphaned), + performs smart recovery: marks as 'completed' if output files exist, + otherwise marks as 'cancelled'. Args: scan_id: Scan ID to stop Returns: - JSON response with stop status + JSON response with stop status or recovery result """ try: session = current_app.db_session - # Check if scan exists and is running + # Check if scan exists scan = session.query(Scan).filter_by(id=scan_id).first() if not scan: logger.warning(f"Scan not found for stop request: {scan_id}") @@ -267,7 +358,8 @@ def stop_running_scan(scan_id): 'message': f'Scan with ID {scan_id} not found' }), 404 - if scan.status != 'running': + # Allow stopping scans with status 'running' or 'finalizing' + if scan.status not in ('running', 'finalizing'): logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'") return jsonify({ 'error': 'Invalid state', @@ -288,11 +380,11 @@ def stop_running_scan(scan_id): 'status': 'stopping' }), 200 else: - logger.warning(f"Failed to stop scan {scan_id}: not found in running scanners") - return jsonify({ - 'error': 'Stop failed', - 'message': 'Scan not found in running scanners registry' - }), 404 + # Scanner not in registry - this is an orphaned scan + # Attempt smart recovery + logger.warning(f"Scan {scan_id} not in registry, attempting smart recovery") + recovery_result = _recover_orphaned_scan(scan, session) + return jsonify(recovery_result), 200 except SQLAlchemyError as e: logger.error(f"Database error stopping scan {scan_id}: {str(e)}") diff --git a/app/web/app.py b/app/web/app.py index bfce6c5..090a536 100644 --- a/app/web/app.py +++ b/app/web/app.py @@ -307,9 +307,12 @@ def init_scheduler(app: Flask) -> None: with app.app_context(): # Clean up any orphaned scans from previous crashes/restarts scan_service = ScanService(app.db_session) - orphaned_count = scan_service.cleanup_orphaned_scans() - if orphaned_count > 0: - app.logger.warning(f"Cleaned up {orphaned_count} orphaned scan(s) on startup") + cleanup_result = scan_service.cleanup_orphaned_scans() + if cleanup_result['total'] > 0: + app.logger.warning( + f"Cleaned up {cleanup_result['total']} orphaned scan(s) on startup: " + f"{cleanup_result['recovered']} recovered, {cleanup_result['failed']} failed" + ) # Load all enabled schedules from database scheduler.load_schedules_on_startup() diff --git a/app/web/jobs/scan_job.py b/app/web/jobs/scan_job.py index 6b48c21..ce867ef 100644 --- a/app/web/jobs/scan_job.py +++ b/app/web/jobs/scan_job.py @@ -240,14 +240,47 @@ def execute_scan(scan_id: int, config_id: int, db_url: str = None): scan_duration = (end_time - start_time).total_seconds() logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds") - # Generate output files (JSON, HTML, ZIP) - logger.info(f"Scan {scan_id}: Generating output files...") - output_paths = scanner.generate_outputs(report, timestamp) + # Transition to 'finalizing' status before output generation + try: + scan = session.query(Scan).filter_by(id=scan_id).first() + if scan: + scan.status = 'finalizing' + scan.current_phase = 'generating_outputs' + session.commit() + logger.info(f"Scan {scan_id}: Status changed to 'finalizing'") + except Exception as e: + logger.error(f"Scan {scan_id}: Failed to update status to finalizing: {e}") + session.rollback() - # Save results to database - logger.info(f"Scan {scan_id}: Saving results to database...") - scan_service = ScanService(session) - scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths) + # Generate output files (JSON, HTML, ZIP) with error handling + output_paths = {} + output_generation_failed = False + try: + logger.info(f"Scan {scan_id}: Generating output files...") + output_paths = scanner.generate_outputs(report, timestamp) + except Exception as e: + output_generation_failed = True + logger.error(f"Scan {scan_id}: Output generation failed: {str(e)}") + logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}") + # Still mark scan as completed with warning since scan data is valid + try: + scan = session.query(Scan).filter_by(id=scan_id).first() + if scan: + scan.status = 'completed' + scan.error_message = f"Scan completed but output file generation failed: {str(e)}" + scan.completed_at = datetime.utcnow() + if scan.started_at: + scan.duration = (datetime.utcnow() - scan.started_at).total_seconds() + session.commit() + logger.info(f"Scan {scan_id}: Marked as completed with output generation warning") + except Exception as db_error: + logger.error(f"Scan {scan_id}: Failed to update status after output error: {db_error}") + + # Save results to database (only if output generation succeeded) + if not output_generation_failed: + logger.info(f"Scan {scan_id}: Saving results to database...") + scan_service = ScanService(session) + scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths) # Evaluate alert rules logger.info(f"Scan {scan_id}: Evaluating alert rules...") diff --git a/app/web/models.py b/app/web/models.py index feeb130..3336b03 100644 --- a/app/web/models.py +++ b/app/web/models.py @@ -45,7 +45,7 @@ class Scan(Base): id = Column(Integer, primary_key=True, autoincrement=True) timestamp = Column(DateTime, nullable=False, index=True, comment="Scan start time (UTC)") duration = Column(Float, nullable=True, comment="Total scan duration in seconds") - status = Column(String(20), nullable=False, default='running', comment="running, completed, failed") + status = Column(String(20), nullable=False, default='running', comment="running, finalizing, completed, failed, cancelled") config_id = Column(Integer, ForeignKey('scan_configs.id'), nullable=True, index=True, comment="FK to scan_configs table") title = Column(Text, nullable=True, comment="Scan title from config") json_path = Column(Text, nullable=True, comment="Path to JSON report") diff --git a/app/web/services/scan_service.py b/app/web/services/scan_service.py index ba518c6..11939cf 100644 --- a/app/web/services/scan_service.py +++ b/app/web/services/scan_service.py @@ -286,52 +286,96 @@ class ScanService: return [self._scan_to_summary_dict(scan) for scan in scans] - def cleanup_orphaned_scans(self) -> int: + def cleanup_orphaned_scans(self) -> dict: """ - Clean up orphaned scans that are stuck in 'running' status. + Clean up orphaned scans with smart recovery. + + For scans stuck in 'running' or 'finalizing' status: + - If output files exist: mark as 'completed' (smart recovery) + - If no output files: mark as 'failed' This should be called on application startup to handle scans that were running when the system crashed or was restarted. - Scans in 'running' status are marked as 'failed' with an appropriate - error message indicating they were orphaned. - Returns: - Number of orphaned scans cleaned up + Dictionary with cleanup results: {'recovered': N, 'failed': N, 'total': N} """ - # Find all scans with status='running' - orphaned_scans = self.db.query(Scan).filter(Scan.status == 'running').all() + # Find all scans with status='running' or 'finalizing' + orphaned_scans = self.db.query(Scan).filter( + Scan.status.in_(['running', 'finalizing']) + ).all() if not orphaned_scans: logger.info("No orphaned scans found") - return 0 + return {'recovered': 0, 'failed': 0, 'total': 0} count = len(orphaned_scans) - logger.warning(f"Found {count} orphaned scan(s) in 'running' status, marking as failed") + logger.warning(f"Found {count} orphaned scan(s), attempting smart recovery") + + recovered_count = 0 + failed_count = 0 + output_dir = Path('/app/output') - # Mark each orphaned scan as failed for scan in orphaned_scans: - scan.status = 'failed' + # Check for existing output files + output_exists = False + output_files_found = [] + + # Check paths stored in database + if scan.json_path and Path(scan.json_path).exists(): + output_exists = True + output_files_found.append('json') + if scan.html_path and Path(scan.html_path).exists(): + output_files_found.append('html') + if scan.zip_path and Path(scan.zip_path).exists(): + output_files_found.append('zip') + + # Also check by timestamp pattern if paths not stored yet + if not output_exists and scan.started_at and output_dir.exists(): + timestamp_pattern = scan.started_at.strftime('%Y%m%d') + for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'): + output_exists = True + output_files_found.append('json') + # Update scan record with found paths + scan.json_path = str(json_file) + html_file = json_file.with_suffix('.html') + if html_file.exists(): + scan.html_path = str(html_file) + output_files_found.append('html') + zip_file = json_file.with_suffix('.zip') + if zip_file.exists(): + scan.zip_path = str(zip_file) + output_files_found.append('zip') + break + + if output_exists: + # Smart recovery: outputs exist, mark as completed + scan.status = 'completed' + scan.error_message = f'Recovered from orphaned state (output files found: {", ".join(output_files_found)})' + recovered_count += 1 + logger.info(f"Recovered orphaned scan {scan.id} as completed (files: {output_files_found})") + else: + # No outputs: mark as failed + scan.status = 'failed' + scan.error_message = ( + "Scan was interrupted by system shutdown or crash. " + "No output files were generated." + ) + failed_count += 1 + logger.info(f"Marked orphaned scan {scan.id} as failed (no output files)") + scan.completed_at = datetime.utcnow() - scan.error_message = ( - "Scan was interrupted by system shutdown or crash. " - "The scan was running but did not complete normally." - ) - - # Calculate duration if we have a started_at time if scan.started_at: - duration = (datetime.utcnow() - scan.started_at).total_seconds() - scan.duration = duration - - logger.info( - f"Marked orphaned scan {scan.id} as failed " - f"(started: {scan.started_at.isoformat() if scan.started_at else 'unknown'})" - ) + scan.duration = (datetime.utcnow() - scan.started_at).total_seconds() self.db.commit() - logger.info(f"Cleaned up {count} orphaned scan(s)") + logger.info(f"Cleaned up {count} orphaned scan(s): {recovered_count} recovered, {failed_count} failed") - return count + return { + 'recovered': recovered_count, + 'failed': failed_count, + 'total': count + } def _save_scan_to_db(self, report: Dict[str, Any], scan_id: int, status: str = 'completed', output_paths: Dict = None) -> None: diff --git a/app/web/utils/validators.py b/app/web/utils/validators.py index 5c355b2..30290c1 100644 --- a/app/web/utils/validators.py +++ b/app/web/utils/validators.py @@ -23,7 +23,7 @@ def validate_scan_status(status: str) -> tuple[bool, Optional[str]]: >>> validate_scan_status('invalid') (False, 'Invalid status: invalid. Must be one of: running, completed, failed') """ - valid_statuses = ['running', 'completed', 'failed', 'cancelled'] + valid_statuses = ['running', 'finalizing', 'completed', 'failed', 'cancelled'] if status not in valid_statuses: return False, f'Invalid status: {status}. Must be one of: {", ".join(valid_statuses)}'