refactor to remove config_files in favor of db

This commit is contained in:
2025-11-19 20:29:14 -06:00
parent b2e6efb4b3
commit 41ba4c47b5
34 changed files with 463 additions and 536 deletions

View File

@@ -58,7 +58,7 @@ class AlertService:
for rule in rules:
try:
# Check if rule applies to this scan's config
if rule.config_file and scan.config_file != rule.config_file:
if rule.config_id and scan.config_id != rule.config_id:
logger.debug(f"Skipping rule {rule.id} - config mismatch")
continue
@@ -178,10 +178,10 @@ class AlertService:
"""
alerts_to_create = []
# Find previous scan with same config_file
# Find previous scan with same config_id
previous_scan = (
self.db.query(Scan)
.filter(Scan.config_file == scan.config_file)
.filter(Scan.config_id == scan.config_id)
.filter(Scan.id < scan.id)
.filter(Scan.status == 'completed')
.order_by(Scan.started_at.desc() if Scan.started_at else Scan.timestamp.desc())
@@ -189,7 +189,7 @@ class AlertService:
)
if not previous_scan:
logger.info(f"No previous scan found for config {scan.config_file}")
logger.info(f"No previous scan found for config_id {scan.config_id}")
return []
try:

View File

@@ -654,23 +654,13 @@ class ConfigService:
# Build full path for comparison
config_path = os.path.join(self.configs_dir, filename)
# Find and delete all schedules using this config (enabled or disabled)
# Note: This function is deprecated. Schedules now use config_id.
# This code path should not be reached for new configs.
deleted_schedules = []
for schedule in schedules:
schedule_config = schedule.get('config_file', '')
# Handle both absolute paths and just filenames
if schedule_config == filename or schedule_config == config_path:
schedule_id = schedule.get('id')
schedule_name = schedule.get('name', 'Unknown')
try:
schedule_service.delete_schedule(schedule_id)
deleted_schedules.append(schedule_name)
except Exception as e:
import logging
logging.getLogger(__name__).warning(
f"Failed to delete schedule {schedule_id} ('{schedule_name}'): {e}"
)
import logging
logging.getLogger(__name__).warning(
f"delete_config_file called for '{filename}' - this is deprecated. Use database configs with config_id instead."
)
if deleted_schedules:
import logging
@@ -841,18 +831,9 @@ class ConfigService:
# Build full path for comparison
config_path = os.path.join(self.configs_dir, filename)
# Find schedules using this config (only enabled schedules)
using_schedules = []
for schedule in schedules:
schedule_config = schedule.get('config_file', '')
# Handle both absolute paths and just filenames
if schedule_config == filename or schedule_config == config_path:
# Only count enabled schedules
if schedule.get('enabled', False):
using_schedules.append(schedule.get('name', 'Unknown'))
return using_schedules
# Note: This function is deprecated. Schedules now use config_id.
# Return empty list as schedules no longer use config_file.
return []
except ImportError:
# If ScheduleService doesn't exist yet, return empty list

View File

@@ -19,7 +19,7 @@ from web.models import (
ScanCertificate, ScanTLSVersion, Site, ScanSiteAssociation
)
from web.utils.pagination import paginate, PaginatedResult
from web.utils.validators import validate_config_file, validate_scan_status
from web.utils.validators import validate_scan_status
logger = logging.getLogger(__name__)
@@ -41,7 +41,7 @@ class ScanService:
"""
self.db = db_session
def trigger_scan(self, config_file: str = None, config_id: int = None,
def trigger_scan(self, config_id: int,
triggered_by: str = 'manual', schedule_id: Optional[int] = None,
scheduler=None) -> int:
"""
@@ -51,8 +51,7 @@ class ScanService:
queues the scan for background execution.
Args:
config_file: Path to YAML configuration file (legacy, optional)
config_id: Database config ID (preferred, optional)
config_id: Database config ID
triggered_by: Source that triggered scan (manual, scheduled, api)
schedule_id: Optional schedule ID if triggered by schedule
scheduler: Optional SchedulerService instance for queuing background jobs
@@ -61,106 +60,48 @@ class ScanService:
Scan ID of the created scan
Raises:
ValueError: If config is invalid or both/neither config_file and config_id provided
ValueError: If config is invalid
"""
# Validate that exactly one config source is provided
if not (bool(config_file) ^ bool(config_id)):
raise ValueError("Must provide exactly one of config_file or config_id")
from web.models import ScanConfig
# Handle database config
if config_id:
from web.models import ScanConfig
# Validate config exists
db_config = self.db.query(ScanConfig).filter_by(id=config_id).first()
if not db_config:
raise ValueError(f"Config with ID {config_id} not found")
# Validate config exists
db_config = self.db.query(ScanConfig).filter_by(id=config_id).first()
if not db_config:
raise ValueError(f"Config with ID {config_id} not found")
# Create scan record with config_id
scan = Scan(
timestamp=datetime.utcnow(),
status='running',
config_id=config_id,
title=db_config.title,
triggered_by=triggered_by,
schedule_id=schedule_id,
created_at=datetime.utcnow()
)
# Create scan record with config_id
scan = Scan(
timestamp=datetime.utcnow(),
status='running',
config_id=config_id,
title=db_config.title,
triggered_by=triggered_by,
schedule_id=schedule_id,
created_at=datetime.utcnow()
)
self.db.add(scan)
self.db.commit()
self.db.refresh(scan)
self.db.add(scan)
self.db.commit()
self.db.refresh(scan)
logger.info(f"Scan {scan.id} triggered via {triggered_by} with config_id={config_id}")
logger.info(f"Scan {scan.id} triggered via {triggered_by} with config_id={config_id}")
# Queue background job if scheduler provided
if scheduler:
try:
job_id = scheduler.queue_scan(scan.id, config_id=config_id)
logger.info(f"Scan {scan.id} queued for background execution (job_id={job_id})")
except Exception as e:
logger.error(f"Failed to queue scan {scan.id}: {str(e)}")
# Mark scan as failed if job queuing fails
scan.status = 'failed'
scan.error_message = f"Failed to queue background job: {str(e)}"
self.db.commit()
raise
else:
logger.warning(f"Scan {scan.id} created but not queued (no scheduler provided)")
return scan.id
# Handle legacy YAML config file
# Queue background job if scheduler provided
if scheduler:
try:
job_id = scheduler.queue_scan(scan.id, config_id=config_id)
logger.info(f"Scan {scan.id} queued for background execution (job_id={job_id})")
except Exception as e:
logger.error(f"Failed to queue scan {scan.id}: {str(e)}")
# Mark scan as failed if job queuing fails
scan.status = 'failed'
scan.error_message = f"Failed to queue background job: {str(e)}"
self.db.commit()
raise
else:
# Validate config file
is_valid, error_msg = validate_config_file(config_file)
if not is_valid:
raise ValueError(f"Invalid config file: {error_msg}")
logger.warning(f"Scan {scan.id} created but not queued (no scheduler provided)")
# Convert config_file to full path if it's just a filename
if not config_file.startswith('/'):
config_path = f'/app/configs/{config_file}'
else:
config_path = config_file
# Load config to get title
import yaml
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
# Create scan record
scan = Scan(
timestamp=datetime.utcnow(),
status='running',
config_file=config_file,
title=config.get('title', 'Untitled Scan'),
triggered_by=triggered_by,
schedule_id=schedule_id,
created_at=datetime.utcnow()
)
self.db.add(scan)
self.db.commit()
self.db.refresh(scan)
logger.info(f"Scan {scan.id} triggered via {triggered_by}")
# Queue background job if scheduler provided
if scheduler:
try:
job_id = scheduler.queue_scan(scan.id, config_file=config_file)
logger.info(f"Scan {scan.id} queued for background execution (job_id={job_id})")
except Exception as e:
logger.error(f"Failed to queue scan {scan.id}: {str(e)}")
# Mark scan as failed if job queuing fails
scan.status = 'failed'
scan.error_message = f"Failed to queue background job: {str(e)}"
self.db.commit()
raise
else:
logger.warning(f"Scan {scan.id} created but not queued (no scheduler provided)")
return scan.id
return scan.id
def get_scan(self, scan_id: int) -> Optional[Dict[str, Any]]:
"""
@@ -614,7 +555,7 @@ class ScanService:
'duration': scan.duration,
'status': scan.status,
'title': scan.title,
'config_file': scan.config_file,
'config_id': scan.config_id,
'json_path': scan.json_path,
'html_path': scan.html_path,
'zip_path': scan.zip_path,
@@ -640,7 +581,7 @@ class ScanService:
'duration': scan.duration,
'status': scan.status,
'title': scan.title,
'config_file': scan.config_file,
'config_id': scan.config_id,
'triggered_by': scan.triggered_by,
'created_at': scan.created_at.isoformat() if scan.created_at else None
}
@@ -783,17 +724,17 @@ class ScanService:
return None
# Check if scans use the same configuration
config1 = scan1.get('config_file', '')
config2 = scan2.get('config_file', '')
same_config = (config1 == config2) and (config1 != '')
config1 = scan1.get('config_id')
config2 = scan2.get('config_id')
same_config = (config1 == config2) and (config1 is not None)
# Generate warning message if configs differ
config_warning = None
if not same_config:
config_warning = (
f"These scans use different configurations. "
f"Scan #{scan1_id} used '{config1 or 'unknown'}' and "
f"Scan #{scan2_id} used '{config2 or 'unknown'}'. "
f"Scan #{scan1_id} used config_id={config1 or 'unknown'} and "
f"Scan #{scan2_id} used config_id={config2 or 'unknown'}. "
f"The comparison may show all changes as additions/removals if the scans "
f"cover different IP ranges or infrastructure."
)
@@ -832,14 +773,14 @@ class ScanService:
'timestamp': scan1['timestamp'],
'title': scan1['title'],
'status': scan1['status'],
'config_file': config1
'config_id': config1
},
'scan2': {
'id': scan2['id'],
'timestamp': scan2['timestamp'],
'title': scan2['title'],
'status': scan2['status'],
'config_file': config2
'config_id': config2
},
'same_config': same_config,
'config_warning': config_warning,

View File

@@ -6,14 +6,13 @@ scheduled scans with cron expressions.
"""
import logging
import os
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from croniter import croniter
from sqlalchemy.orm import Session
from web.models import Schedule, Scan
from web.models import Schedule, Scan, ScanConfig
from web.utils.pagination import paginate, PaginatedResult
logger = logging.getLogger(__name__)
@@ -39,7 +38,7 @@ class ScheduleService:
def create_schedule(
self,
name: str,
config_file: str,
config_id: int,
cron_expression: str,
enabled: bool = True
) -> int:
@@ -48,7 +47,7 @@ class ScheduleService:
Args:
name: Human-readable schedule name
config_file: Path to YAML configuration file
config_id: Database config ID
cron_expression: Cron expression (e.g., '0 2 * * *')
enabled: Whether schedule is active
@@ -56,22 +55,17 @@ class ScheduleService:
Schedule ID of the created schedule
Raises:
ValueError: If cron expression is invalid or config file doesn't exist
ValueError: If cron expression is invalid or config doesn't exist
"""
# Validate cron expression
is_valid, error_msg = self.validate_cron_expression(cron_expression)
if not is_valid:
raise ValueError(f"Invalid cron expression: {error_msg}")
# Validate config file exists
# If config_file is just a filename, prepend the configs directory
if not config_file.startswith('/'):
config_file_path = os.path.join('/app/configs', config_file)
else:
config_file_path = config_file
if not os.path.isfile(config_file_path):
raise ValueError(f"Config file not found: {config_file}")
# Validate config exists
db_config = self.db.query(ScanConfig).filter_by(id=config_id).first()
if not db_config:
raise ValueError(f"Config with ID {config_id} not found")
# Calculate next run time
next_run = self.calculate_next_run(cron_expression) if enabled else None
@@ -79,7 +73,7 @@ class ScheduleService:
# Create schedule record
schedule = Schedule(
name=name,
config_file=config_file,
config_id=config_id,
cron_expression=cron_expression,
enabled=enabled,
last_run=None,
@@ -200,17 +194,11 @@ class ScheduleService:
if schedule.enabled or updates.get('enabled', False):
updates['next_run'] = self.calculate_next_run(updates['cron_expression'])
# Validate config file if being updated
if 'config_file' in updates:
config_file = updates['config_file']
# If config_file is just a filename, prepend the configs directory
if not config_file.startswith('/'):
config_file_path = os.path.join('/app/configs', config_file)
else:
config_file_path = config_file
if not os.path.isfile(config_file_path):
raise ValueError(f"Config file not found: {updates['config_file']}")
# Validate config_id if being updated
if 'config_id' in updates:
db_config = self.db.query(ScanConfig).filter_by(id=updates['config_id']).first()
if not db_config:
raise ValueError(f"Config with ID {updates['config_id']} not found")
# Handle enabled toggle
if 'enabled' in updates:
@@ -400,7 +388,7 @@ class ScheduleService:
'timestamp': scan.timestamp.isoformat() if scan.timestamp else None,
'status': scan.status,
'title': scan.title,
'config_file': scan.config_file
'config_id': scan.config_id
}
for scan in scans
]
@@ -418,7 +406,7 @@ class ScheduleService:
return {
'id': schedule.id,
'name': schedule.name,
'config_file': schedule.config_file,
'config_id': schedule.config_id,
'cron_expression': schedule.cron_expression,
'enabled': schedule.enabled,
'last_run': schedule.last_run.isoformat() if schedule.last_run else None,

View File

@@ -131,7 +131,7 @@ class SchedulerService:
try:
self.add_scheduled_scan(
schedule_id=schedule.id,
config_file=schedule.config_file,
config_id=schedule.config_id,
cron_expression=schedule.cron_expression
)
logger.info(f"Loaded schedule {schedule.id}: '{schedule.name}'")
@@ -149,16 +149,13 @@ class SchedulerService:
except Exception as e:
logger.error(f"Error loading schedules on startup: {str(e)}", exc_info=True)
def queue_scan(self, scan_id: int, config_file: str = None, config_id: int = None) -> str:
def queue_scan(self, scan_id: int, config_id: int) -> str:
"""
Queue a scan for immediate background execution.
Args:
scan_id: Database ID of the scan
config_file: Path to YAML configuration file (legacy, optional)
config_id: Database config ID (preferred, optional)
Note: Provide exactly one of config_file or config_id
config_id: Database config ID
Returns:
Job ID from APScheduler
@@ -172,7 +169,7 @@ class SchedulerService:
# Add job to run immediately
job = self.scheduler.add_job(
func=execute_scan,
kwargs={'scan_id': scan_id, 'config_file': config_file, 'config_id': config_id, 'db_url': self.db_url},
kwargs={'scan_id': scan_id, 'config_id': config_id, 'db_url': self.db_url},
id=f'scan_{scan_id}',
name=f'Scan {scan_id}',
replace_existing=True,
@@ -182,14 +179,14 @@ class SchedulerService:
logger.info(f"Queued scan {scan_id} for background execution (job_id={job.id})")
return job.id
def add_scheduled_scan(self, schedule_id: int, config_file: str,
def add_scheduled_scan(self, schedule_id: int, config_id: int,
cron_expression: str) -> str:
"""
Add a recurring scheduled scan.
Args:
schedule_id: Database ID of the schedule
config_file: Path to YAML configuration file
config_id: Database config ID
cron_expression: Cron expression (e.g., "0 2 * * *" for 2am daily)
Returns:
@@ -286,14 +283,14 @@ class SchedulerService:
# Create and trigger scan
scan_service = ScanService(session)
scan_id = scan_service.trigger_scan(
config_file=schedule['config_file'],
config_id=schedule['config_id'],
triggered_by='scheduled',
schedule_id=schedule_id,
scheduler=None # Don't pass scheduler to avoid recursion
)
# Queue the scan for execution
self.queue_scan(scan_id, schedule['config_file'])
self.queue_scan(scan_id, schedule['config_id'])
# Update schedule's last_run and next_run
from croniter import croniter

View File

@@ -87,7 +87,7 @@ class TemplateService:
"timestamp": scan.timestamp,
"duration": scan.duration,
"status": scan.status,
"config_file": scan.config_file,
"config_id": scan.config_id,
"triggered_by": scan.triggered_by,
"started_at": scan.started_at,
"completed_at": scan.completed_at,
@@ -247,7 +247,7 @@ class TemplateService:
"timestamp": datetime.utcnow(),
"duration": 125.5,
"status": "completed",
"config_file": "production-scan.yaml",
"config_id": 1,
"triggered_by": "schedule",
"started_at": datetime.utcnow(),
"completed_at": datetime.utcnow(),