Changes Made

1. app/web/utils/validators.py - Added 'finalizing' to valid_statuses list
  2. app/web/models.py - Updated status field comment to document all valid statuses
  3. app/web/jobs/scan_job.py
  - Added transition to 'finalizing' status before output file generation
  - Sets current_phase = 'generating_outputs' during this phase
  - Wrapped output generation in try-except with proper error handling
  - If output generation fails, scan is marked 'completed' with warning message (scan data is still valid)

  4. app/web/api/scans.py
  - Added _recover_orphaned_scan() helper function for smart recovery
  - Modified stop_running_scan() to:
    - Allow stopping scans with status 'running' OR 'finalizing'
    - When scanner not in registry, perform smart recovery instead of returning 404
    - Smart recovery checks for output files and marks as 'completed' if found, 'cancelled' if not

  5. app/web/services/scan_service.py
  - Enhanced cleanup_orphaned_scans() with smart recovery logic
  - Now finds scans in both 'running' and 'finalizing' status
  - Returns dict with stats: {'recovered': N, 'failed': N, 'total': N}

  6. app/web/app.py - Updated caller to handle new dict return type from cleanup_orphaned_scans()

  Expected Behavior Now

  1. Normal scan flow: running → finalizing → completed
  2. Stop on active scan: Sends cancel signal, becomes 'cancelled'
  3. Stop on orphaned scan with files: Smart recovery → 'completed'
  4. Stop on orphaned scan without files: → 'cancelled'
  5. App restart with orphans: Startup cleanup uses smart recovery
This commit is contained in:
2025-11-25 14:47:36 -06:00
parent 07c2bcfd11
commit 847e05abbe
6 changed files with 220 additions and 48 deletions

View File

@@ -7,6 +7,9 @@ scan results.
import json
import logging
from datetime import datetime
from pathlib import Path
from flask import Blueprint, current_app, jsonify, request
from sqlalchemy.exc import SQLAlchemyError
@@ -20,6 +23,89 @@ bp = Blueprint('scans', __name__)
logger = logging.getLogger(__name__)
def _recover_orphaned_scan(scan: Scan, session) -> dict:
"""
Recover an orphaned scan by checking for output files.
If output files exist: mark as 'completed' (smart recovery)
If no output files: mark as 'cancelled'
Args:
scan: The orphaned Scan object
session: Database session
Returns:
Dictionary with recovery result for API response
"""
# Check for existing output files
output_exists = False
output_files_found = []
# Check paths stored in database
if scan.json_path and Path(scan.json_path).exists():
output_exists = True
output_files_found.append('json')
if scan.html_path and Path(scan.html_path).exists():
output_files_found.append('html')
if scan.zip_path and Path(scan.zip_path).exists():
output_files_found.append('zip')
# Also check by timestamp pattern if paths not stored yet
if not output_exists and scan.started_at:
output_dir = Path('/app/output')
if output_dir.exists():
timestamp_pattern = scan.started_at.strftime('%Y%m%d')
for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
output_exists = True
output_files_found.append('json')
# Update scan record with found paths
scan.json_path = str(json_file)
html_file = json_file.with_suffix('.html')
if html_file.exists():
scan.html_path = str(html_file)
output_files_found.append('html')
zip_file = json_file.with_suffix('.zip')
if zip_file.exists():
scan.zip_path = str(zip_file)
output_files_found.append('zip')
break
if output_exists:
# Smart recovery: outputs exist, mark as completed
scan.status = 'completed'
scan.completed_at = datetime.utcnow()
if scan.started_at:
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.error_message = None
session.commit()
logger.info(f"Scan {scan.id}: Recovered as completed (files: {output_files_found})")
return {
'scan_id': scan.id,
'status': 'completed',
'message': f'Scan recovered as completed (output files found: {", ".join(output_files_found)})',
'recovery_type': 'smart_recovery'
}
else:
# No outputs: mark as cancelled
scan.status = 'cancelled'
scan.completed_at = datetime.utcnow()
if scan.started_at:
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.error_message = 'Scan process was interrupted before completion. No output files were generated.'
session.commit()
logger.info(f"Scan {scan.id}: Marked as cancelled (orphaned, no output files)")
return {
'scan_id': scan.id,
'status': 'cancelled',
'message': 'Orphaned scan cancelled (no output files found)',
'recovery_type': 'orphan_cleanup'
}
@bp.route('', methods=['GET'])
@api_auth_required
def list_scans():
@@ -247,18 +333,23 @@ def delete_scan(scan_id):
@api_auth_required
def stop_running_scan(scan_id):
"""
Stop a running scan.
Stop a running scan with smart recovery for orphaned scans.
If the scan is actively running in the registry, sends a cancel signal.
If the scan shows as running/finalizing but is not in the registry (orphaned),
performs smart recovery: marks as 'completed' if output files exist,
otherwise marks as 'cancelled'.
Args:
scan_id: Scan ID to stop
Returns:
JSON response with stop status
JSON response with stop status or recovery result
"""
try:
session = current_app.db_session
# Check if scan exists and is running
# Check if scan exists
scan = session.query(Scan).filter_by(id=scan_id).first()
if not scan:
logger.warning(f"Scan not found for stop request: {scan_id}")
@@ -267,7 +358,8 @@ def stop_running_scan(scan_id):
'message': f'Scan with ID {scan_id} not found'
}), 404
if scan.status != 'running':
# Allow stopping scans with status 'running' or 'finalizing'
if scan.status not in ('running', 'finalizing'):
logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'")
return jsonify({
'error': 'Invalid state',
@@ -288,11 +380,11 @@ def stop_running_scan(scan_id):
'status': 'stopping'
}), 200
else:
logger.warning(f"Failed to stop scan {scan_id}: not found in running scanners")
return jsonify({
'error': 'Stop failed',
'message': 'Scan not found in running scanners registry'
}), 404
# Scanner not in registry - this is an orphaned scan
# Attempt smart recovery
logger.warning(f"Scan {scan_id} not in registry, attempting smart recovery")
recovery_result = _recover_orphaned_scan(scan, session)
return jsonify(recovery_result), 200
except SQLAlchemyError as e:
logger.error(f"Database error stopping scan {scan_id}: {str(e)}")