Files
SneakyScan/app/web/services/schedule_service.py
Phillip Tarrant 5e3a70f837 Fix schedule management and update documentation for database-backed configs
This commit addresses multiple issues with schedule management and updates
  documentation to reflect the transition from YAML-based to database-backed
  configuration system.

  **Documentation Updates:**
  - Update DEPLOYMENT.md to remove all references to YAML config files
  - Document that all configurations are now stored in SQLite database
  - Update API examples to use config IDs instead of YAML filenames
  - Remove configs directory from backup/restore procedures
  - Update volume management section to reflect database-only storage

  **Cron Expression Handling:**
  - Add comprehensive documentation for APScheduler cron format conversion
  - Document that from_crontab() accepts standard format (Sunday=0) and converts automatically
  - Add validate_cron_expression() helper method with detailed error messages
  - Include helpful hints for day-of-week field errors in validation
  - Fix all deprecated datetime.utcnow() calls, replace with datetime.now(timezone.utc)

  **Timezone-Aware DateTime Fixes:**
  - Fix "can't subtract offset-naive and offset-aware datetimes" error
  - Add timezone awareness to croniter.get_next() return values
  - Make _get_relative_time() defensive to handle both naive and aware datetimes
  - Ensure all datetime comparisons use timezone-aware objects

  **Schedule Edit UI Fixes:**
  - Fix JavaScript error "Cannot set properties of null (setting 'value')"
  - Change reference from non-existent 'config-id' to correct 'config-file' element
  - Add config_name field to schedule API responses for better UX
  - Eagerly load Schedule.config relationship using joinedload()
  - Fix AttributeError: use schedule.config.title instead of .name
  - Display config title and ID in schedule edit form

  **Technical Details:**
  - app/web/services/schedule_service.py: 6 datetime.utcnow() fixes, validation enhancements
  - app/web/services/scheduler_service.py: Documentation, validation, timezone fixes
  - app/web/templates/schedule_edit.html: JavaScript element reference fix
  - docs/DEPLOYMENT.md: Complete rewrite of config management sections

  Fixes scheduling for Sunday at midnight (cron: 0 0 * * 0)
  Fixes schedule edit page JavaScript errors
  Improves user experience with config title display
2025-11-24 12:53:06 -06:00

521 lines
16 KiB
Python

"""
Schedule service for managing scheduled scan operations.
This service handles the business logic for creating, updating, and managing
scheduled scans with cron expressions.
"""
import logging
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
from croniter import croniter
from sqlalchemy.orm import Session
from web.models import Schedule, Scan, ScanConfig
from web.utils.pagination import paginate, PaginatedResult
logger = logging.getLogger(__name__)
class ScheduleService:
"""
Service for managing scheduled scans.
Handles schedule lifecycle: creation, validation, updating,
and cron expression processing.
"""
def __init__(self, db_session: Session):
"""
Initialize schedule service.
Args:
db_session: SQLAlchemy database session
"""
self.db = db_session
def create_schedule(
self,
name: str,
config_id: int,
cron_expression: str,
enabled: bool = True
) -> int:
"""
Create a new schedule.
Args:
name: Human-readable schedule name
config_id: Database config ID
cron_expression: Cron expression (e.g., '0 2 * * *')
enabled: Whether schedule is active
Returns:
Schedule ID of the created schedule
Raises:
ValueError: If cron expression is invalid or config doesn't exist
"""
# Validate cron expression
is_valid, error_msg = self.validate_cron_expression(cron_expression)
if not is_valid:
raise ValueError(f"Invalid cron expression: {error_msg}")
# Validate config exists
db_config = self.db.query(ScanConfig).filter_by(id=config_id).first()
if not db_config:
raise ValueError(f"Config with ID {config_id} not found")
# Calculate next run time
next_run = self.calculate_next_run(cron_expression) if enabled else None
# Create schedule record
now_utc = datetime.now(timezone.utc)
schedule = Schedule(
name=name,
config_id=config_id,
cron_expression=cron_expression,
enabled=enabled,
last_run=None,
next_run=next_run,
created_at=now_utc,
updated_at=now_utc
)
self.db.add(schedule)
self.db.commit()
self.db.refresh(schedule)
logger.info(f"Schedule {schedule.id} created: '{name}' with cron '{cron_expression}'")
return schedule.id
def get_schedule(self, schedule_id: int) -> Dict[str, Any]:
"""
Get schedule details by ID.
Args:
schedule_id: Schedule ID
Returns:
Schedule dictionary with details and execution history
Raises:
ValueError: If schedule not found
"""
from sqlalchemy.orm import joinedload
schedule = (
self.db.query(Schedule)
.options(joinedload(Schedule.config))
.filter(Schedule.id == schedule_id)
.first()
)
if not schedule:
raise ValueError(f"Schedule {schedule_id} not found")
# Convert to dict and include history
schedule_dict = self._schedule_to_dict(schedule)
schedule_dict['history'] = self.get_schedule_history(schedule_id, limit=10)
return schedule_dict
def list_schedules(
self,
page: int = 1,
per_page: int = 20,
enabled_filter: Optional[bool] = None
) -> Dict[str, Any]:
"""
List all schedules with pagination and filtering.
Args:
page: Page number (1-indexed)
per_page: Items per page
enabled_filter: Filter by enabled status (None = all)
Returns:
Dictionary with paginated schedules:
{
'schedules': [...],
'total': int,
'page': int,
'per_page': int,
'pages': int
}
"""
from sqlalchemy.orm import joinedload
# Build query and eagerly load config relationship
query = self.db.query(Schedule).options(joinedload(Schedule.config))
# Apply filter
if enabled_filter is not None:
query = query.filter(Schedule.enabled == enabled_filter)
# Order by next_run (nulls last), then by name
query = query.order_by(Schedule.next_run.is_(None), Schedule.next_run, Schedule.name)
# Paginate
result = paginate(query, page=page, per_page=per_page)
# Convert schedules to dicts
schedules = [self._schedule_to_dict(s) for s in result.items]
return {
'schedules': schedules,
'total': result.total,
'page': result.page,
'per_page': result.per_page,
'pages': result.pages
}
def update_schedule(
self,
schedule_id: int,
**updates: Any
) -> Dict[str, Any]:
"""
Update schedule fields.
Args:
schedule_id: Schedule ID
**updates: Fields to update (name, config_file, cron_expression, enabled)
Returns:
Updated schedule dictionary
Raises:
ValueError: If schedule not found or invalid updates
"""
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
if not schedule:
raise ValueError(f"Schedule {schedule_id} not found")
# Validate cron expression if being updated
if 'cron_expression' in updates:
is_valid, error_msg = self.validate_cron_expression(updates['cron_expression'])
if not is_valid:
raise ValueError(f"Invalid cron expression: {error_msg}")
# Recalculate next_run
if schedule.enabled or updates.get('enabled', False):
updates['next_run'] = self.calculate_next_run(updates['cron_expression'])
# Validate config_id if being updated
if 'config_id' in updates:
db_config = self.db.query(ScanConfig).filter_by(id=updates['config_id']).first()
if not db_config:
raise ValueError(f"Config with ID {updates['config_id']} not found")
# Handle enabled toggle
if 'enabled' in updates:
if updates['enabled'] and not schedule.enabled:
# Being enabled - calculate next_run
cron_expr = updates.get('cron_expression', schedule.cron_expression)
updates['next_run'] = self.calculate_next_run(cron_expr)
elif not updates['enabled'] and schedule.enabled:
# Being disabled - clear next_run
updates['next_run'] = None
# Update fields
for key, value in updates.items():
if hasattr(schedule, key):
setattr(schedule, key, value)
schedule.updated_at = datetime.now(timezone.utc)
self.db.commit()
self.db.refresh(schedule)
logger.info(f"Schedule {schedule_id} updated: {list(updates.keys())}")
return self._schedule_to_dict(schedule)
def delete_schedule(self, schedule_id: int) -> bool:
"""
Delete a schedule.
Note: Associated scans are NOT deleted (schedule_id becomes null).
Args:
schedule_id: Schedule ID
Returns:
True if deleted successfully
Raises:
ValueError: If schedule not found
"""
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
if not schedule:
raise ValueError(f"Schedule {schedule_id} not found")
schedule_name = schedule.name
self.db.delete(schedule)
self.db.commit()
logger.info(f"Schedule {schedule_id} ('{schedule_name}') deleted")
return True
def toggle_enabled(self, schedule_id: int, enabled: bool) -> Dict[str, Any]:
"""
Enable or disable a schedule.
Args:
schedule_id: Schedule ID
enabled: New enabled status
Returns:
Updated schedule dictionary
Raises:
ValueError: If schedule not found
"""
return self.update_schedule(schedule_id, enabled=enabled)
def update_run_times(
self,
schedule_id: int,
last_run: datetime,
next_run: datetime
) -> bool:
"""
Update last_run and next_run timestamps.
Called after each execution.
Args:
schedule_id: Schedule ID
last_run: Last execution time
next_run: Next scheduled execution time
Returns:
True if updated successfully
Raises:
ValueError: If schedule not found
"""
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
if not schedule:
raise ValueError(f"Schedule {schedule_id} not found")
schedule.last_run = last_run
schedule.next_run = next_run
schedule.updated_at = datetime.now(timezone.utc)
self.db.commit()
logger.debug(f"Schedule {schedule_id} run times updated: last={last_run}, next={next_run}")
return True
def validate_cron_expression(self, cron_expr: str) -> Tuple[bool, Optional[str]]:
"""
Validate a cron expression.
Args:
cron_expr: Cron expression to validate in standard crontab format
Format: minute hour day month day_of_week
Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday
(APScheduler will convert this to its internal format automatically)
Returns:
Tuple of (is_valid, error_message)
- (True, None) if valid
- (False, error_message) if invalid
Note:
This validates using croniter which uses standard crontab format.
APScheduler's from_crontab() will handle the conversion when the
schedule is registered with the scheduler.
"""
try:
# Try to create a croniter instance
# croniter uses standard crontab format (Sunday=0)
from datetime import timezone
base_time = datetime.now(timezone.utc)
cron = croniter(cron_expr, base_time)
# Try to get the next run time (validates the expression)
cron.get_next(datetime)
# Validate basic format (5 fields)
fields = cron_expr.split()
if len(fields) != 5:
return (False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}")
return (True, None)
except (ValueError, KeyError) as e:
error_msg = str(e)
# Add helpful hint for day_of_week errors
if "day" in error_msg.lower() and len(cron_expr.split()) >= 5:
hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
return (False, f"{error_msg}{hint}")
return (False, str(e))
except Exception as e:
return (False, f"Unexpected error: {str(e)}")
def calculate_next_run(
self,
cron_expr: str,
from_time: Optional[datetime] = None
) -> datetime:
"""
Calculate next run time from cron expression.
Args:
cron_expr: Cron expression
from_time: Base time (defaults to now UTC)
Returns:
Next run datetime (UTC, timezone-aware)
Raises:
ValueError: If cron expression is invalid
"""
if from_time is None:
from_time = datetime.now(timezone.utc)
try:
cron = croniter(cron_expr, from_time)
next_run = cron.get_next(datetime)
# croniter returns naive datetime, so we need to add timezone info
# Since we're using UTC for all calculations, add UTC timezone
if next_run.tzinfo is None:
next_run = next_run.replace(tzinfo=timezone.utc)
return next_run
except Exception as e:
raise ValueError(f"Invalid cron expression '{cron_expr}': {str(e)}")
def get_schedule_history(
self,
schedule_id: int,
limit: int = 10
) -> List[Dict[str, Any]]:
"""
Get recent scans triggered by this schedule.
Args:
schedule_id: Schedule ID
limit: Maximum number of scans to return
Returns:
List of scan dictionaries (recent first)
"""
scans = (
self.db.query(Scan)
.filter(Scan.schedule_id == schedule_id)
.order_by(Scan.timestamp.desc())
.limit(limit)
.all()
)
return [
{
'id': scan.id,
'timestamp': scan.timestamp.isoformat() if scan.timestamp else None,
'status': scan.status,
'title': scan.title,
'config_id': scan.config_id
}
for scan in scans
]
def _schedule_to_dict(self, schedule: Schedule) -> Dict[str, Any]:
"""
Convert Schedule model to dictionary.
Args:
schedule: Schedule model instance
Returns:
Dictionary representation
"""
# Get config title if relationship is loaded
config_name = None
if schedule.config:
config_name = schedule.config.title
return {
'id': schedule.id,
'name': schedule.name,
'config_id': schedule.config_id,
'config_name': config_name,
'cron_expression': schedule.cron_expression,
'enabled': schedule.enabled,
'last_run': schedule.last_run.isoformat() if schedule.last_run else None,
'next_run': schedule.next_run.isoformat() if schedule.next_run else None,
'next_run_relative': self._get_relative_time(schedule.next_run) if schedule.next_run else None,
'created_at': schedule.created_at.isoformat() if schedule.created_at else None,
'updated_at': schedule.updated_at.isoformat() if schedule.updated_at else None
}
def _get_relative_time(self, dt: Optional[datetime]) -> Optional[str]:
"""
Format datetime as relative time.
Args:
dt: Datetime to format (UTC, can be naive or aware)
Returns:
Human-readable relative time (e.g., "in 2 hours", "yesterday")
"""
if dt is None:
return None
# Ensure both datetimes are timezone-aware for comparison
now = datetime.now(timezone.utc)
# If dt is naive, assume it's UTC and add timezone info
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
diff = dt - now
# Future times
if diff.total_seconds() > 0:
seconds = int(diff.total_seconds())
if seconds < 60:
return "in less than a minute"
elif seconds < 3600:
minutes = seconds // 60
return f"in {minutes} minute{'s' if minutes != 1 else ''}"
elif seconds < 86400:
hours = seconds // 3600
return f"in {hours} hour{'s' if hours != 1 else ''}"
elif seconds < 604800:
days = seconds // 86400
return f"in {days} day{'s' if days != 1 else ''}"
else:
weeks = seconds // 604800
return f"in {weeks} week{'s' if weeks != 1 else ''}"
# Past times
else:
seconds = int(-diff.total_seconds())
if seconds < 60:
return "less than a minute ago"
elif seconds < 3600:
minutes = seconds // 60
return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
elif seconds < 86400:
hours = seconds // 3600
return f"{hours} hour{'s' if hours != 1 else ''} ago"
elif seconds < 604800:
days = seconds // 86400
return f"{days} day{'s' if days != 1 else ''} ago"
else:
weeks = seconds // 604800
return f"{weeks} week{'s' if weeks != 1 else ''} ago"