nightly #9

Merged
ptarrant merged 2 commits from nightly into beta 2025-11-25 20:49:25 +00:00
6 changed files with 220 additions and 48 deletions
Showing only changes of commit 9e2fc348b7 - Show all commits

View File

@@ -7,6 +7,9 @@ scan results.
import json import json
import logging import logging
from datetime import datetime
from pathlib import Path
from flask import Blueprint, current_app, jsonify, request from flask import Blueprint, current_app, jsonify, request
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@@ -20,6 +23,89 @@ bp = Blueprint('scans', __name__)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _recover_orphaned_scan(scan: Scan, session) -> dict:
"""
Recover an orphaned scan by checking for output files.
If output files exist: mark as 'completed' (smart recovery)
If no output files: mark as 'cancelled'
Args:
scan: The orphaned Scan object
session: Database session
Returns:
Dictionary with recovery result for API response
"""
# Check for existing output files
output_exists = False
output_files_found = []
# Check paths stored in database
if scan.json_path and Path(scan.json_path).exists():
output_exists = True
output_files_found.append('json')
if scan.html_path and Path(scan.html_path).exists():
output_files_found.append('html')
if scan.zip_path and Path(scan.zip_path).exists():
output_files_found.append('zip')
# Also check by timestamp pattern if paths not stored yet
if not output_exists and scan.started_at:
output_dir = Path('/app/output')
if output_dir.exists():
timestamp_pattern = scan.started_at.strftime('%Y%m%d')
for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
output_exists = True
output_files_found.append('json')
# Update scan record with found paths
scan.json_path = str(json_file)
html_file = json_file.with_suffix('.html')
if html_file.exists():
scan.html_path = str(html_file)
output_files_found.append('html')
zip_file = json_file.with_suffix('.zip')
if zip_file.exists():
scan.zip_path = str(zip_file)
output_files_found.append('zip')
break
if output_exists:
# Smart recovery: outputs exist, mark as completed
scan.status = 'completed'
scan.completed_at = datetime.utcnow()
if scan.started_at:
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.error_message = None
session.commit()
logger.info(f"Scan {scan.id}: Recovered as completed (files: {output_files_found})")
return {
'scan_id': scan.id,
'status': 'completed',
'message': f'Scan recovered as completed (output files found: {", ".join(output_files_found)})',
'recovery_type': 'smart_recovery'
}
else:
# No outputs: mark as cancelled
scan.status = 'cancelled'
scan.completed_at = datetime.utcnow()
if scan.started_at:
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.error_message = 'Scan process was interrupted before completion. No output files were generated.'
session.commit()
logger.info(f"Scan {scan.id}: Marked as cancelled (orphaned, no output files)")
return {
'scan_id': scan.id,
'status': 'cancelled',
'message': 'Orphaned scan cancelled (no output files found)',
'recovery_type': 'orphan_cleanup'
}
@bp.route('', methods=['GET']) @bp.route('', methods=['GET'])
@api_auth_required @api_auth_required
def list_scans(): def list_scans():
@@ -247,18 +333,23 @@ def delete_scan(scan_id):
@api_auth_required @api_auth_required
def stop_running_scan(scan_id): def stop_running_scan(scan_id):
""" """
Stop a running scan. Stop a running scan with smart recovery for orphaned scans.
If the scan is actively running in the registry, sends a cancel signal.
If the scan shows as running/finalizing but is not in the registry (orphaned),
performs smart recovery: marks as 'completed' if output files exist,
otherwise marks as 'cancelled'.
Args: Args:
scan_id: Scan ID to stop scan_id: Scan ID to stop
Returns: Returns:
JSON response with stop status JSON response with stop status or recovery result
""" """
try: try:
session = current_app.db_session session = current_app.db_session
# Check if scan exists and is running # Check if scan exists
scan = session.query(Scan).filter_by(id=scan_id).first() scan = session.query(Scan).filter_by(id=scan_id).first()
if not scan: if not scan:
logger.warning(f"Scan not found for stop request: {scan_id}") logger.warning(f"Scan not found for stop request: {scan_id}")
@@ -267,7 +358,8 @@ def stop_running_scan(scan_id):
'message': f'Scan with ID {scan_id} not found' 'message': f'Scan with ID {scan_id} not found'
}), 404 }), 404
if scan.status != 'running': # Allow stopping scans with status 'running' or 'finalizing'
if scan.status not in ('running', 'finalizing'):
logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'") logger.warning(f"Cannot stop scan {scan_id}: status is '{scan.status}'")
return jsonify({ return jsonify({
'error': 'Invalid state', 'error': 'Invalid state',
@@ -288,11 +380,11 @@ def stop_running_scan(scan_id):
'status': 'stopping' 'status': 'stopping'
}), 200 }), 200
else: else:
logger.warning(f"Failed to stop scan {scan_id}: not found in running scanners") # Scanner not in registry - this is an orphaned scan
return jsonify({ # Attempt smart recovery
'error': 'Stop failed', logger.warning(f"Scan {scan_id} not in registry, attempting smart recovery")
'message': 'Scan not found in running scanners registry' recovery_result = _recover_orphaned_scan(scan, session)
}), 404 return jsonify(recovery_result), 200
except SQLAlchemyError as e: except SQLAlchemyError as e:
logger.error(f"Database error stopping scan {scan_id}: {str(e)}") logger.error(f"Database error stopping scan {scan_id}: {str(e)}")

View File

@@ -307,9 +307,12 @@ def init_scheduler(app: Flask) -> None:
with app.app_context(): with app.app_context():
# Clean up any orphaned scans from previous crashes/restarts # Clean up any orphaned scans from previous crashes/restarts
scan_service = ScanService(app.db_session) scan_service = ScanService(app.db_session)
orphaned_count = scan_service.cleanup_orphaned_scans() cleanup_result = scan_service.cleanup_orphaned_scans()
if orphaned_count > 0: if cleanup_result['total'] > 0:
app.logger.warning(f"Cleaned up {orphaned_count} orphaned scan(s) on startup") app.logger.warning(
f"Cleaned up {cleanup_result['total']} orphaned scan(s) on startup: "
f"{cleanup_result['recovered']} recovered, {cleanup_result['failed']} failed"
)
# Load all enabled schedules from database # Load all enabled schedules from database
scheduler.load_schedules_on_startup() scheduler.load_schedules_on_startup()

View File

@@ -240,11 +240,44 @@ def execute_scan(scan_id: int, config_id: int, db_url: str = None):
scan_duration = (end_time - start_time).total_seconds() scan_duration = (end_time - start_time).total_seconds()
logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds") logger.info(f"Scan {scan_id}: Scanner completed in {scan_duration:.2f} seconds")
# Generate output files (JSON, HTML, ZIP) # Transition to 'finalizing' status before output generation
try:
scan = session.query(Scan).filter_by(id=scan_id).first()
if scan:
scan.status = 'finalizing'
scan.current_phase = 'generating_outputs'
session.commit()
logger.info(f"Scan {scan_id}: Status changed to 'finalizing'")
except Exception as e:
logger.error(f"Scan {scan_id}: Failed to update status to finalizing: {e}")
session.rollback()
# Generate output files (JSON, HTML, ZIP) with error handling
output_paths = {}
output_generation_failed = False
try:
logger.info(f"Scan {scan_id}: Generating output files...") logger.info(f"Scan {scan_id}: Generating output files...")
output_paths = scanner.generate_outputs(report, timestamp) output_paths = scanner.generate_outputs(report, timestamp)
except Exception as e:
output_generation_failed = True
logger.error(f"Scan {scan_id}: Output generation failed: {str(e)}")
logger.error(f"Scan {scan_id}: Traceback:\n{traceback.format_exc()}")
# Still mark scan as completed with warning since scan data is valid
try:
scan = session.query(Scan).filter_by(id=scan_id).first()
if scan:
scan.status = 'completed'
scan.error_message = f"Scan completed but output file generation failed: {str(e)}"
scan.completed_at = datetime.utcnow()
if scan.started_at:
scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
session.commit()
logger.info(f"Scan {scan_id}: Marked as completed with output generation warning")
except Exception as db_error:
logger.error(f"Scan {scan_id}: Failed to update status after output error: {db_error}")
# Save results to database # Save results to database (only if output generation succeeded)
if not output_generation_failed:
logger.info(f"Scan {scan_id}: Saving results to database...") logger.info(f"Scan {scan_id}: Saving results to database...")
scan_service = ScanService(session) scan_service = ScanService(session)
scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths) scan_service._save_scan_to_db(report, scan_id, status='completed', output_paths=output_paths)

View File

@@ -45,7 +45,7 @@ class Scan(Base):
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
timestamp = Column(DateTime, nullable=False, index=True, comment="Scan start time (UTC)") timestamp = Column(DateTime, nullable=False, index=True, comment="Scan start time (UTC)")
duration = Column(Float, nullable=True, comment="Total scan duration in seconds") duration = Column(Float, nullable=True, comment="Total scan duration in seconds")
status = Column(String(20), nullable=False, default='running', comment="running, completed, failed") status = Column(String(20), nullable=False, default='running', comment="running, finalizing, completed, failed, cancelled")
config_id = Column(Integer, ForeignKey('scan_configs.id'), nullable=True, index=True, comment="FK to scan_configs table") config_id = Column(Integer, ForeignKey('scan_configs.id'), nullable=True, index=True, comment="FK to scan_configs table")
title = Column(Text, nullable=True, comment="Scan title from config") title = Column(Text, nullable=True, comment="Scan title from config")
json_path = Column(Text, nullable=True, comment="Path to JSON report") json_path = Column(Text, nullable=True, comment="Path to JSON report")

View File

@@ -286,52 +286,96 @@ class ScanService:
return [self._scan_to_summary_dict(scan) for scan in scans] return [self._scan_to_summary_dict(scan) for scan in scans]
def cleanup_orphaned_scans(self) -> int: def cleanup_orphaned_scans(self) -> dict:
""" """
Clean up orphaned scans that are stuck in 'running' status. Clean up orphaned scans with smart recovery.
For scans stuck in 'running' or 'finalizing' status:
- If output files exist: mark as 'completed' (smart recovery)
- If no output files: mark as 'failed'
This should be called on application startup to handle scans that This should be called on application startup to handle scans that
were running when the system crashed or was restarted. were running when the system crashed or was restarted.
Scans in 'running' status are marked as 'failed' with an appropriate
error message indicating they were orphaned.
Returns: Returns:
Number of orphaned scans cleaned up Dictionary with cleanup results: {'recovered': N, 'failed': N, 'total': N}
""" """
# Find all scans with status='running' # Find all scans with status='running' or 'finalizing'
orphaned_scans = self.db.query(Scan).filter(Scan.status == 'running').all() orphaned_scans = self.db.query(Scan).filter(
Scan.status.in_(['running', 'finalizing'])
).all()
if not orphaned_scans: if not orphaned_scans:
logger.info("No orphaned scans found") logger.info("No orphaned scans found")
return 0 return {'recovered': 0, 'failed': 0, 'total': 0}
count = len(orphaned_scans) count = len(orphaned_scans)
logger.warning(f"Found {count} orphaned scan(s) in 'running' status, marking as failed") logger.warning(f"Found {count} orphaned scan(s), attempting smart recovery")
recovered_count = 0
failed_count = 0
output_dir = Path('/app/output')
# Mark each orphaned scan as failed
for scan in orphaned_scans: for scan in orphaned_scans:
# Check for existing output files
output_exists = False
output_files_found = []
# Check paths stored in database
if scan.json_path and Path(scan.json_path).exists():
output_exists = True
output_files_found.append('json')
if scan.html_path and Path(scan.html_path).exists():
output_files_found.append('html')
if scan.zip_path and Path(scan.zip_path).exists():
output_files_found.append('zip')
# Also check by timestamp pattern if paths not stored yet
if not output_exists and scan.started_at and output_dir.exists():
timestamp_pattern = scan.started_at.strftime('%Y%m%d')
for json_file in output_dir.glob(f'scan_report_{timestamp_pattern}*.json'):
output_exists = True
output_files_found.append('json')
# Update scan record with found paths
scan.json_path = str(json_file)
html_file = json_file.with_suffix('.html')
if html_file.exists():
scan.html_path = str(html_file)
output_files_found.append('html')
zip_file = json_file.with_suffix('.zip')
if zip_file.exists():
scan.zip_path = str(zip_file)
output_files_found.append('zip')
break
if output_exists:
# Smart recovery: outputs exist, mark as completed
scan.status = 'completed'
scan.error_message = f'Recovered from orphaned state (output files found: {", ".join(output_files_found)})'
recovered_count += 1
logger.info(f"Recovered orphaned scan {scan.id} as completed (files: {output_files_found})")
else:
# No outputs: mark as failed
scan.status = 'failed' scan.status = 'failed'
scan.completed_at = datetime.utcnow()
scan.error_message = ( scan.error_message = (
"Scan was interrupted by system shutdown or crash. " "Scan was interrupted by system shutdown or crash. "
"The scan was running but did not complete normally." "No output files were generated."
) )
failed_count += 1
logger.info(f"Marked orphaned scan {scan.id} as failed (no output files)")
# Calculate duration if we have a started_at time scan.completed_at = datetime.utcnow()
if scan.started_at: if scan.started_at:
duration = (datetime.utcnow() - scan.started_at).total_seconds() scan.duration = (datetime.utcnow() - scan.started_at).total_seconds()
scan.duration = duration
logger.info(
f"Marked orphaned scan {scan.id} as failed "
f"(started: {scan.started_at.isoformat() if scan.started_at else 'unknown'})"
)
self.db.commit() self.db.commit()
logger.info(f"Cleaned up {count} orphaned scan(s)") logger.info(f"Cleaned up {count} orphaned scan(s): {recovered_count} recovered, {failed_count} failed")
return count return {
'recovered': recovered_count,
'failed': failed_count,
'total': count
}
def _save_scan_to_db(self, report: Dict[str, Any], scan_id: int, def _save_scan_to_db(self, report: Dict[str, Any], scan_id: int,
status: str = 'completed', output_paths: Dict = None) -> None: status: str = 'completed', output_paths: Dict = None) -> None:

View File

@@ -23,7 +23,7 @@ def validate_scan_status(status: str) -> tuple[bool, Optional[str]]:
>>> validate_scan_status('invalid') >>> validate_scan_status('invalid')
(False, 'Invalid status: invalid. Must be one of: running, completed, failed') (False, 'Invalid status: invalid. Must be one of: running, completed, failed')
""" """
valid_statuses = ['running', 'completed', 'failed', 'cancelled'] valid_statuses = ['running', 'finalizing', 'completed', 'failed', 'cancelled']
if status not in valid_statuses: if status not in valid_statuses:
return False, f'Invalid status: {status}. Must be one of: {", ".join(valid_statuses)}' return False, f'Invalid status: {status}. Must be one of: {", ".join(valid_statuses)}'