This commit addresses multiple issues with schedule management and updates documentation to reflect the transition from YAML-based to database-backed configuration system. **Documentation Updates:** - Update DEPLOYMENT.md to remove all references to YAML config files - Document that all configurations are now stored in SQLite database - Update API examples to use config IDs instead of YAML filenames - Remove configs directory from backup/restore procedures - Update volume management section to reflect database-only storage **Cron Expression Handling:** - Add comprehensive documentation for APScheduler cron format conversion - Document that from_crontab() accepts standard format (Sunday=0) and converts automatically - Add validate_cron_expression() helper method with detailed error messages - Include helpful hints for day-of-week field errors in validation - Fix all deprecated datetime.utcnow() calls, replace with datetime.now(timezone.utc) **Timezone-Aware DateTime Fixes:** - Fix "can't subtract offset-naive and offset-aware datetimes" error - Add timezone awareness to croniter.get_next() return values - Make _get_relative_time() defensive to handle both naive and aware datetimes - Ensure all datetime comparisons use timezone-aware objects **Schedule Edit UI Fixes:** - Fix JavaScript error "Cannot set properties of null (setting 'value')" - Change reference from non-existent 'config-id' to correct 'config-file' element - Add config_name field to schedule API responses for better UX - Eagerly load Schedule.config relationship using joinedload() - Fix AttributeError: use schedule.config.title instead of .name - Display config title and ID in schedule edit form **Technical Details:** - app/web/services/schedule_service.py: 6 datetime.utcnow() fixes, validation enhancements - app/web/services/scheduler_service.py: Documentation, validation, timezone fixes - app/web/templates/schedule_edit.html: JavaScript element reference fix - docs/DEPLOYMENT.md: Complete rewrite of config management sections Fixes scheduling for Sunday at midnight (cron: 0 0 * * 0) Fixes schedule edit page JavaScript errors Improves user experience with config title display
521 lines
16 KiB
Python
521 lines
16 KiB
Python
"""
|
|
Schedule service for managing scheduled scan operations.
|
|
|
|
This service handles the business logic for creating, updating, and managing
|
|
scheduled scans with cron expressions.
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from croniter import croniter
|
|
from sqlalchemy.orm import Session
|
|
|
|
from web.models import Schedule, Scan, ScanConfig
|
|
from web.utils.pagination import paginate, PaginatedResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ScheduleService:
|
|
"""
|
|
Service for managing scheduled scans.
|
|
|
|
Handles schedule lifecycle: creation, validation, updating,
|
|
and cron expression processing.
|
|
"""
|
|
|
|
def __init__(self, db_session: Session):
|
|
"""
|
|
Initialize schedule service.
|
|
|
|
Args:
|
|
db_session: SQLAlchemy database session
|
|
"""
|
|
self.db = db_session
|
|
|
|
def create_schedule(
|
|
self,
|
|
name: str,
|
|
config_id: int,
|
|
cron_expression: str,
|
|
enabled: bool = True
|
|
) -> int:
|
|
"""
|
|
Create a new schedule.
|
|
|
|
Args:
|
|
name: Human-readable schedule name
|
|
config_id: Database config ID
|
|
cron_expression: Cron expression (e.g., '0 2 * * *')
|
|
enabled: Whether schedule is active
|
|
|
|
Returns:
|
|
Schedule ID of the created schedule
|
|
|
|
Raises:
|
|
ValueError: If cron expression is invalid or config doesn't exist
|
|
"""
|
|
# Validate cron expression
|
|
is_valid, error_msg = self.validate_cron_expression(cron_expression)
|
|
if not is_valid:
|
|
raise ValueError(f"Invalid cron expression: {error_msg}")
|
|
|
|
# Validate config exists
|
|
db_config = self.db.query(ScanConfig).filter_by(id=config_id).first()
|
|
if not db_config:
|
|
raise ValueError(f"Config with ID {config_id} not found")
|
|
|
|
# Calculate next run time
|
|
next_run = self.calculate_next_run(cron_expression) if enabled else None
|
|
|
|
# Create schedule record
|
|
now_utc = datetime.now(timezone.utc)
|
|
schedule = Schedule(
|
|
name=name,
|
|
config_id=config_id,
|
|
cron_expression=cron_expression,
|
|
enabled=enabled,
|
|
last_run=None,
|
|
next_run=next_run,
|
|
created_at=now_utc,
|
|
updated_at=now_utc
|
|
)
|
|
|
|
self.db.add(schedule)
|
|
self.db.commit()
|
|
self.db.refresh(schedule)
|
|
|
|
logger.info(f"Schedule {schedule.id} created: '{name}' with cron '{cron_expression}'")
|
|
|
|
return schedule.id
|
|
|
|
def get_schedule(self, schedule_id: int) -> Dict[str, Any]:
|
|
"""
|
|
Get schedule details by ID.
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
|
|
Returns:
|
|
Schedule dictionary with details and execution history
|
|
|
|
Raises:
|
|
ValueError: If schedule not found
|
|
"""
|
|
from sqlalchemy.orm import joinedload
|
|
|
|
schedule = (
|
|
self.db.query(Schedule)
|
|
.options(joinedload(Schedule.config))
|
|
.filter(Schedule.id == schedule_id)
|
|
.first()
|
|
)
|
|
|
|
if not schedule:
|
|
raise ValueError(f"Schedule {schedule_id} not found")
|
|
|
|
# Convert to dict and include history
|
|
schedule_dict = self._schedule_to_dict(schedule)
|
|
schedule_dict['history'] = self.get_schedule_history(schedule_id, limit=10)
|
|
|
|
return schedule_dict
|
|
|
|
def list_schedules(
|
|
self,
|
|
page: int = 1,
|
|
per_page: int = 20,
|
|
enabled_filter: Optional[bool] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
List all schedules with pagination and filtering.
|
|
|
|
Args:
|
|
page: Page number (1-indexed)
|
|
per_page: Items per page
|
|
enabled_filter: Filter by enabled status (None = all)
|
|
|
|
Returns:
|
|
Dictionary with paginated schedules:
|
|
{
|
|
'schedules': [...],
|
|
'total': int,
|
|
'page': int,
|
|
'per_page': int,
|
|
'pages': int
|
|
}
|
|
"""
|
|
from sqlalchemy.orm import joinedload
|
|
|
|
# Build query and eagerly load config relationship
|
|
query = self.db.query(Schedule).options(joinedload(Schedule.config))
|
|
|
|
# Apply filter
|
|
if enabled_filter is not None:
|
|
query = query.filter(Schedule.enabled == enabled_filter)
|
|
|
|
# Order by next_run (nulls last), then by name
|
|
query = query.order_by(Schedule.next_run.is_(None), Schedule.next_run, Schedule.name)
|
|
|
|
# Paginate
|
|
result = paginate(query, page=page, per_page=per_page)
|
|
|
|
# Convert schedules to dicts
|
|
schedules = [self._schedule_to_dict(s) for s in result.items]
|
|
|
|
return {
|
|
'schedules': schedules,
|
|
'total': result.total,
|
|
'page': result.page,
|
|
'per_page': result.per_page,
|
|
'pages': result.pages
|
|
}
|
|
|
|
def update_schedule(
|
|
self,
|
|
schedule_id: int,
|
|
**updates: Any
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Update schedule fields.
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
**updates: Fields to update (name, config_file, cron_expression, enabled)
|
|
|
|
Returns:
|
|
Updated schedule dictionary
|
|
|
|
Raises:
|
|
ValueError: If schedule not found or invalid updates
|
|
"""
|
|
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
|
|
|
|
if not schedule:
|
|
raise ValueError(f"Schedule {schedule_id} not found")
|
|
|
|
# Validate cron expression if being updated
|
|
if 'cron_expression' in updates:
|
|
is_valid, error_msg = self.validate_cron_expression(updates['cron_expression'])
|
|
if not is_valid:
|
|
raise ValueError(f"Invalid cron expression: {error_msg}")
|
|
# Recalculate next_run
|
|
if schedule.enabled or updates.get('enabled', False):
|
|
updates['next_run'] = self.calculate_next_run(updates['cron_expression'])
|
|
|
|
# Validate config_id if being updated
|
|
if 'config_id' in updates:
|
|
db_config = self.db.query(ScanConfig).filter_by(id=updates['config_id']).first()
|
|
if not db_config:
|
|
raise ValueError(f"Config with ID {updates['config_id']} not found")
|
|
|
|
# Handle enabled toggle
|
|
if 'enabled' in updates:
|
|
if updates['enabled'] and not schedule.enabled:
|
|
# Being enabled - calculate next_run
|
|
cron_expr = updates.get('cron_expression', schedule.cron_expression)
|
|
updates['next_run'] = self.calculate_next_run(cron_expr)
|
|
elif not updates['enabled'] and schedule.enabled:
|
|
# Being disabled - clear next_run
|
|
updates['next_run'] = None
|
|
|
|
# Update fields
|
|
for key, value in updates.items():
|
|
if hasattr(schedule, key):
|
|
setattr(schedule, key, value)
|
|
|
|
schedule.updated_at = datetime.now(timezone.utc)
|
|
|
|
self.db.commit()
|
|
self.db.refresh(schedule)
|
|
|
|
logger.info(f"Schedule {schedule_id} updated: {list(updates.keys())}")
|
|
|
|
return self._schedule_to_dict(schedule)
|
|
|
|
def delete_schedule(self, schedule_id: int) -> bool:
|
|
"""
|
|
Delete a schedule.
|
|
|
|
Note: Associated scans are NOT deleted (schedule_id becomes null).
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
|
|
Returns:
|
|
True if deleted successfully
|
|
|
|
Raises:
|
|
ValueError: If schedule not found
|
|
"""
|
|
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
|
|
|
|
if not schedule:
|
|
raise ValueError(f"Schedule {schedule_id} not found")
|
|
|
|
schedule_name = schedule.name
|
|
|
|
self.db.delete(schedule)
|
|
self.db.commit()
|
|
|
|
logger.info(f"Schedule {schedule_id} ('{schedule_name}') deleted")
|
|
|
|
return True
|
|
|
|
def toggle_enabled(self, schedule_id: int, enabled: bool) -> Dict[str, Any]:
|
|
"""
|
|
Enable or disable a schedule.
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
enabled: New enabled status
|
|
|
|
Returns:
|
|
Updated schedule dictionary
|
|
|
|
Raises:
|
|
ValueError: If schedule not found
|
|
"""
|
|
return self.update_schedule(schedule_id, enabled=enabled)
|
|
|
|
def update_run_times(
|
|
self,
|
|
schedule_id: int,
|
|
last_run: datetime,
|
|
next_run: datetime
|
|
) -> bool:
|
|
"""
|
|
Update last_run and next_run timestamps.
|
|
|
|
Called after each execution.
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
last_run: Last execution time
|
|
next_run: Next scheduled execution time
|
|
|
|
Returns:
|
|
True if updated successfully
|
|
|
|
Raises:
|
|
ValueError: If schedule not found
|
|
"""
|
|
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
|
|
|
|
if not schedule:
|
|
raise ValueError(f"Schedule {schedule_id} not found")
|
|
|
|
schedule.last_run = last_run
|
|
schedule.next_run = next_run
|
|
schedule.updated_at = datetime.now(timezone.utc)
|
|
|
|
self.db.commit()
|
|
|
|
logger.debug(f"Schedule {schedule_id} run times updated: last={last_run}, next={next_run}")
|
|
|
|
return True
|
|
|
|
def validate_cron_expression(self, cron_expr: str) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
Validate a cron expression.
|
|
|
|
Args:
|
|
cron_expr: Cron expression to validate in standard crontab format
|
|
Format: minute hour day month day_of_week
|
|
Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday
|
|
(APScheduler will convert this to its internal format automatically)
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message)
|
|
- (True, None) if valid
|
|
- (False, error_message) if invalid
|
|
|
|
Note:
|
|
This validates using croniter which uses standard crontab format.
|
|
APScheduler's from_crontab() will handle the conversion when the
|
|
schedule is registered with the scheduler.
|
|
"""
|
|
try:
|
|
# Try to create a croniter instance
|
|
# croniter uses standard crontab format (Sunday=0)
|
|
from datetime import timezone
|
|
base_time = datetime.now(timezone.utc)
|
|
cron = croniter(cron_expr, base_time)
|
|
|
|
# Try to get the next run time (validates the expression)
|
|
cron.get_next(datetime)
|
|
|
|
# Validate basic format (5 fields)
|
|
fields = cron_expr.split()
|
|
if len(fields) != 5:
|
|
return (False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}")
|
|
|
|
return (True, None)
|
|
except (ValueError, KeyError) as e:
|
|
error_msg = str(e)
|
|
# Add helpful hint for day_of_week errors
|
|
if "day" in error_msg.lower() and len(cron_expr.split()) >= 5:
|
|
hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
|
|
return (False, f"{error_msg}{hint}")
|
|
return (False, str(e))
|
|
except Exception as e:
|
|
return (False, f"Unexpected error: {str(e)}")
|
|
|
|
def calculate_next_run(
|
|
self,
|
|
cron_expr: str,
|
|
from_time: Optional[datetime] = None
|
|
) -> datetime:
|
|
"""
|
|
Calculate next run time from cron expression.
|
|
|
|
Args:
|
|
cron_expr: Cron expression
|
|
from_time: Base time (defaults to now UTC)
|
|
|
|
Returns:
|
|
Next run datetime (UTC, timezone-aware)
|
|
|
|
Raises:
|
|
ValueError: If cron expression is invalid
|
|
"""
|
|
if from_time is None:
|
|
from_time = datetime.now(timezone.utc)
|
|
|
|
try:
|
|
cron = croniter(cron_expr, from_time)
|
|
next_run = cron.get_next(datetime)
|
|
|
|
# croniter returns naive datetime, so we need to add timezone info
|
|
# Since we're using UTC for all calculations, add UTC timezone
|
|
if next_run.tzinfo is None:
|
|
next_run = next_run.replace(tzinfo=timezone.utc)
|
|
|
|
return next_run
|
|
except Exception as e:
|
|
raise ValueError(f"Invalid cron expression '{cron_expr}': {str(e)}")
|
|
|
|
def get_schedule_history(
|
|
self,
|
|
schedule_id: int,
|
|
limit: int = 10
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get recent scans triggered by this schedule.
|
|
|
|
Args:
|
|
schedule_id: Schedule ID
|
|
limit: Maximum number of scans to return
|
|
|
|
Returns:
|
|
List of scan dictionaries (recent first)
|
|
"""
|
|
scans = (
|
|
self.db.query(Scan)
|
|
.filter(Scan.schedule_id == schedule_id)
|
|
.order_by(Scan.timestamp.desc())
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
|
|
return [
|
|
{
|
|
'id': scan.id,
|
|
'timestamp': scan.timestamp.isoformat() if scan.timestamp else None,
|
|
'status': scan.status,
|
|
'title': scan.title,
|
|
'config_id': scan.config_id
|
|
}
|
|
for scan in scans
|
|
]
|
|
|
|
def _schedule_to_dict(self, schedule: Schedule) -> Dict[str, Any]:
|
|
"""
|
|
Convert Schedule model to dictionary.
|
|
|
|
Args:
|
|
schedule: Schedule model instance
|
|
|
|
Returns:
|
|
Dictionary representation
|
|
"""
|
|
# Get config title if relationship is loaded
|
|
config_name = None
|
|
if schedule.config:
|
|
config_name = schedule.config.title
|
|
|
|
return {
|
|
'id': schedule.id,
|
|
'name': schedule.name,
|
|
'config_id': schedule.config_id,
|
|
'config_name': config_name,
|
|
'cron_expression': schedule.cron_expression,
|
|
'enabled': schedule.enabled,
|
|
'last_run': schedule.last_run.isoformat() if schedule.last_run else None,
|
|
'next_run': schedule.next_run.isoformat() if schedule.next_run else None,
|
|
'next_run_relative': self._get_relative_time(schedule.next_run) if schedule.next_run else None,
|
|
'created_at': schedule.created_at.isoformat() if schedule.created_at else None,
|
|
'updated_at': schedule.updated_at.isoformat() if schedule.updated_at else None
|
|
}
|
|
|
|
def _get_relative_time(self, dt: Optional[datetime]) -> Optional[str]:
|
|
"""
|
|
Format datetime as relative time.
|
|
|
|
Args:
|
|
dt: Datetime to format (UTC, can be naive or aware)
|
|
|
|
Returns:
|
|
Human-readable relative time (e.g., "in 2 hours", "yesterday")
|
|
"""
|
|
if dt is None:
|
|
return None
|
|
|
|
# Ensure both datetimes are timezone-aware for comparison
|
|
now = datetime.now(timezone.utc)
|
|
|
|
# If dt is naive, assume it's UTC and add timezone info
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
|
|
diff = dt - now
|
|
|
|
# Future times
|
|
if diff.total_seconds() > 0:
|
|
seconds = int(diff.total_seconds())
|
|
|
|
if seconds < 60:
|
|
return "in less than a minute"
|
|
elif seconds < 3600:
|
|
minutes = seconds // 60
|
|
return f"in {minutes} minute{'s' if minutes != 1 else ''}"
|
|
elif seconds < 86400:
|
|
hours = seconds // 3600
|
|
return f"in {hours} hour{'s' if hours != 1 else ''}"
|
|
elif seconds < 604800:
|
|
days = seconds // 86400
|
|
return f"in {days} day{'s' if days != 1 else ''}"
|
|
else:
|
|
weeks = seconds // 604800
|
|
return f"in {weeks} week{'s' if weeks != 1 else ''}"
|
|
|
|
# Past times
|
|
else:
|
|
seconds = int(-diff.total_seconds())
|
|
|
|
if seconds < 60:
|
|
return "less than a minute ago"
|
|
elif seconds < 3600:
|
|
minutes = seconds // 60
|
|
return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
|
|
elif seconds < 86400:
|
|
hours = seconds // 3600
|
|
return f"{hours} hour{'s' if hours != 1 else ''} ago"
|
|
elif seconds < 604800:
|
|
days = seconds // 86400
|
|
return f"{days} day{'s' if days != 1 else ''} ago"
|
|
else:
|
|
weeks = seconds // 604800
|
|
return f"{weeks} week{'s' if weeks != 1 else ''} ago"
|