Changes Made

1. app/web/utils/validators.py - Added 'finalizing' to valid_statuses list
  2. app/web/models.py - Updated status field comment to document all valid statuses
  3. app/web/jobs/scan_job.py
  - Added transition to 'finalizing' status before output file generation
  - Sets current_phase = 'generating_outputs' during this phase
  - Wrapped output generation in try-except with proper error handling
  - If output generation fails, scan is marked 'completed' with warning message (scan data is still valid)

  4. app/web/api/scans.py
  - Added _recover_orphaned_scan() helper function for smart recovery
  - Modified stop_running_scan() to:
    - Allow stopping scans with status 'running' OR 'finalizing'
    - When scanner not in registry, perform smart recovery instead of returning 404
    - Smart recovery checks for output files and marks as 'completed' if found, 'cancelled' if not

  5. app/web/services/scan_service.py
  - Enhanced cleanup_orphaned_scans() with smart recovery logic
  - Now finds scans in both 'running' and 'finalizing' status
  - Returns dict with stats: {'recovered': N, 'failed': N, 'total': N}

  6. app/web/app.py - Updated caller to handle new dict return type from cleanup_orphaned_scans()

  Expected Behavior Now

  1. Normal scan flow: running → finalizing → completed
  2. Stop on active scan: Sends cancel signal, becomes 'cancelled'
  3. Stop on orphaned scan with files: Smart recovery → 'completed'
  4. Stop on orphaned scan without files: → 'cancelled'
  5. App restart with orphans: Startup cleanup uses smart recovery
This commit is contained in:
2025-11-25 14:47:36 -06:00
parent 07c2bcfd11
commit 847e05abbe
6 changed files with 220 additions and 48 deletions

View File

@@ -286,52 +286,96 @@ class ScanService:
return [self._scan_to_summary_dict(scan) for scan in scans]
def cleanup_orphaned_scans(self) -> int:
def cleanup_orphaned_scans(self) -> dict:
"""
Clean up orphaned scans that are stuck in 'running' status.
Clean up orphaned scans with smart recovery.
For scans stuck in 'running' or 'finalizing' status:
- If output files exist: mark as 'completed' (smart recovery)
- If no output files: mark as 'failed'
This should be called on application startup to handle scans that
were running when the system crashed or was restarted.
Scans in 'running' status are marked as 'failed' with an appropriate
error message indicating they were orphaned.
Returns:
Number of orphaned scans cleaned up
Dictionary with cleanup results: {'recovered': N, 'failed': N, 'total': N}
"""
# Find all scans with status='running'
orphaned_scans = self.db.query(Scan).filter(Scan.status == 'running').all()
# Find all scans with status='running' or 'finalizing'
orphaned_scans = self.db.query(Scan).filter(
Scan.status.in_(['running', 'finalizing'])
).all()
if not orphaned_scans:
logger.info("No orphaned scans found")
return 0
return {'recovered': 0, 'failed': 0, 'total': 0}
count = len(orphaned_scans)
logger.warning(f"Found {count} orphaned scan(s) in 'running' status, marking as failed")
logger.warning(f"Found {count} orphaned scan(s), attempting smart recovery")
recovered_count = 0
failed_count = 0
output_dir = Path('/app/output')
# Mark each orphaned scan as failed
for scan in orphaned_scans:
scan.status = 'failed'
# Check for existing output files
output_exists = False
output_files_found = []
# Check paths stored in database
if scan.json_path and Path(scan.json_path).exists():
output_exists = True
output_files_found.append('json')
if scan.html_path and Path(scan.html_path).exists():
output_files_found.append('html')
if scan.zip_path and Path(scan.zip_path).exists():
output_files_found.append('zip')
# Also check by timestamp pattern if paths not stored yet
if not output_exists and scan.started_at and output_dir.exists():
timestamp_pattern = scan.started_at.strftime('%Y%m%d')
for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
output_exists = True
output_files_found.append('json')
# Update scan record with found paths
scan.json_path = str(json_file)
html_file = json_file.with_suffix('.html')
if html_file.exists():
scan.html_path = str(html_file)
output_files_found.append('html')
zip_file = json_file.with_suffix('.zip')
if zip_file.exists():
scan.zip_path = str(zip_file)
output_files_found.append('zip')
break
if output_exists:
# Smart recovery: outputs exist, mark as completed
scan.status = 'completed'
scan.error_message = f'Recovered from orphaned state (output files found: {", ".join(output_files_found)})'
recovered_count += 1
logger.info(f"Recovered orphaned scan {scan.id} as completed (files: {output_files_found})")
else:
# No outputs: mark as failed
scan.status = 'failed'
scan.error_message = (
"Scan was interrupted by system shutdown or crash. "
"No output files were generated."
)
failed_count += 1
logger.info(f"Marked orphaned scan {scan.id} as failed (no output files)")
scan.completed_at = datetime.utcnow()
scan.error_message = (
"Scan was interrupted by system shutdown or crash. "
"The scan was running but did not complete normally."
)
# Calculate duration if we have a started_at time
if scan.started_at:
duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.duration = duration
logger.info(
f"Marked orphaned scan {scan.id} as failed "
f"(started: {scan.started_at.isoformat() if scan.started_at else 'unknown'})"
)
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
self.db.commit()
logger.info(f"Cleaned up {count} orphaned scan(s)")
logger.info(f"Cleaned up {count} orphaned scan(s): {recovered_count} recovered, {failed_count} failed")
return count
return {
'recovered': recovered_count,
'failed': failed_count,
'total': count
}
def _save_scan_to_db(self, report: Dict[str, Any], scan_id: int,
status: str = 'completed', output_paths: Dict = None) -> None: