Changes Made
1. app/web/utils/validators.py - Added 'finalizing' to valid_statuses list
2. app/web/models.py - Updated status field comment to document all valid statuses
3. app/web/jobs/scan_job.py
- Added transition to 'finalizing' status before output file generation
- Sets current_phase = 'generating_outputs' during this phase
- Wrapped output generation in try-except with proper error handling
- If output generation fails, scan is marked 'completed' with warning message (scan data is still valid)
4. app/web/api/scans.py
- Added _recover_orphaned_scan() helper function for smart recovery
- Modified stop_running_scan() to:
- Allow stopping scans with status 'running' OR 'finalizing'
- When scanner not in registry, perform smart recovery instead of returning 404
- Smart recovery checks for output files and marks as 'completed' if found, 'cancelled' if not
5. app/web/services/scan_service.py
- Enhanced cleanup_orphaned_scans() with smart recovery logic
- Now finds scans in both 'running' and 'finalizing' status
- Returns dict with stats: {'recovered': N, 'failed': N, 'total': N}
6. app/web/app.py - Updated caller to handle new dict return type from cleanup_orphaned_scans()
Expected Behavior Now
1. Normal scan flow: running → finalizing → completed
2. Stop on active scan: Sends cancel signal, becomes 'cancelled'
3. Stop on orphaned scan with files: Smart recovery → 'completed'
4. Stop on orphaned scan without files: → 'cancelled'
5. App restart with orphans: Startup cleanup uses smart recovery
This commit is contained in:
@@ -7,6 +7,9 @@ scan results.
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from flask import Blueprint, current_app, jsonify, request
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
@@ -20,6 +23,89 @@ bp = Blueprint('scans', __name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _recover_orphaned_scan(scan: Scan, session) -> dict:
|
||||
"""
|
||||
Recover an orphaned scan by checking for output files.
|
||||
|
||||
If output files exist: mark as 'completed' (smart recovery)
|
||||
If no output files: mark as 'cancelled'
|
||||
|
||||
Args:
|
||||
scan: The orphaned Scan object
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with recovery result for API response
|
||||
"""
|
||||
# Check for existing output files
|
||||
output_exists = False
|
||||
output_files_found = []
|
||||
|
||||
# Check paths stored in database
|
||||
if scan.json_path and Path(scan.json_path).exists():
|
||||
output_exists = True
|
||||
output_files_found.append('json')
|
||||
if scan.html_path and Path(scan.html_path).exists():
|
||||
output_files_found.append('html')
|
||||
if scan.zip_path and Path(scan.zip_path).exists():
|
||||
output_files_found.append('zip')
|
||||
|
||||
# Also check by timestamp pattern if paths not stored yet
|
||||
if not output_exists and scan.started_at:
|
||||
output_dir = Path('/app/output')
|
||||
if output_dir.exists():
|
||||
timestamp_pattern = scan.started_at.strftime('%Y%m%d')
|
||||
for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
|
||||
output_exists = True
|
||||
output_files_found.append('json')
|
||||
# Update scan record with found paths
|
||||
scan.json_path = str(json_file)
|
||||
html_file = json_file.with_suffix('.html')
|
||||
if html_file.exists():
|
||||
scan.html_path = str(html_file)
|
||||
output_files_found.append('html')
|
||||
zip_file = json_file.with_suffix('.zip')
|
||||
if zip_file.exists():
|
||||
scan.zip_path = str(zip_file)
|
||||
output_files_found.append('zip')
|
||||
break
|
||||
|
||||
if output_exists:
|
||||
# Smart recovery: outputs exist, mark as completed
|
||||
scan.status = 'completed'
|
||||
scan.completed_at = datetime.utcnow()
|
||||
if scan.started_at:
|
||||
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
|
||||
scan.error_message = None
|
||||
session.commit()
|
||||
|
||||
logger.info(f"Scan {scan.id}: Recovered as completed (files: {output_files_found})")
|
||||
|
||||
return {
|
||||
'scan_id': scan.id,
|
||||
'status': 'completed',
|
||||
'message': f'Scan recovered as completed (output files found: {", ".join(output_files_found)})',
|
||||
'recovery_type': 'smart_recovery'
|
||||
}
|
||||
else:
|
||||
# No outputs: mark as cancelled
|
||||
scan.status = 'cancelled'
|
||||
scan.completed_at = datetime.utcnow()
|
||||
if scan.started_at:
|
||||
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
|
||||
scan.error_message = 'Scan process was interrupted before completion. No output files were generated.'
|
||||
session.commit()
|
||||
|
||||
logger.info(f"Scan {scan.id}: Marked as cancelled (orphaned, no output files)")
|
||||
|
||||
return {
|
||||
'scan_id': scan.id,
|
||||
'status': 'cancelled',
|
||||
'message': 'Orphaned scan cancelled (no output files found)',
|
||||
'recovery_type': 'orphan_cleanup'
|
||||
}
|
||||
|
||||
|
||||
@bp.route('', methods=['GET'])
|
||||
@api_auth_required
|
||||
def list_scans():
|
||||
@@ -247,18 +333,23 @@ def delete_scan(scan_id):
|
||||
@api_auth_required
|
||||
def stop_running_scan(scan_id):
|
||||
"""
|
||||
Stop a running scan.
|
||||
Stop a running scan with smart recovery for orphaned scans.
|
||||
|
||||
If the scan is actively running in the registry, sends a cancel signal.
|
||||
If the scan shows as running/finalizing but is not in the registry (orphaned),
|
||||
performs smart recovery: marks as 'completed' if output files exist,
|
||||
otherwise marks as 'cancelled'.
|
||||
|
||||
Args:
|
||||
scan_id: Scan ID to stop
|
||||
|
||||
Returns:
|
||||
JSON response with stop status
|
||||
JSON response with stop status or recovery result
|
||||
"""
|
||||
try:
|
||||
session = current_app.db_session
|
||||
|
||||
# Check if scan exists and is running
|
||||
# Check if scan exists
|
||||
scan = session.query(Scan).filter_by(id=scan_id).first()
|
||||
if not scan:
|
||||
logger.warning(f"Scan not found for stop request: {scan_id}")
|
||||
@@ -267,7 +358,8 @@ def stop_running_scan(scan_id):
|
||||
'message': f'Scan with ID {scan_id} not found'
|
||||
}), 404
|
||||
|
||||
if scan.status != 'running':
|
||||
# Allow stopping scans with status 'running' or 'finalizing'
|
||||
if scan.status not in ('running', 'finalizing'):
|
||||
logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'")
|
||||
return jsonify({
|
||||
'error': 'Invalid state',
|
||||
@@ -288,11 +380,11 @@ def stop_running_scan(scan_id):
|
||||
'status': 'stopping'
|
||||
}), 200
|
||||
else:
|
||||
logger.warning(f"Failed to stop scan {scan_id}: not found in running scanners")
|
||||
return jsonify({
|
||||
'error': 'Stop failed',
|
||||
'message': 'Scan not found in running scanners registry'
|
||||
}), 404
|
||||
# Scanner not in registry - this is an orphaned scan
|
||||
# Attempt smart recovery
|
||||
logger.warning(f"Scan {scan_id} not in registry, attempting smart recovery")
|
||||
recovery_result = _recover_orphaned_scan(scan, session)
|
||||
return jsonify(recovery_result), 200
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Database error stopping scan {scan_id}: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user