Fix schedule management and update documentation for database-backed configs

This commit addresses multiple issues with schedule management and updates
  documentation to reflect the transition from YAML-based to database-backed
  configuration system.

  **Documentation Updates:**
  - Update DEPLOYMENT.md to remove all references to YAML config files
  - Document that all configurations are now stored in SQLite database
  - Update API examples to use config IDs instead of YAML filenames
  - Remove configs directory from backup/restore procedures
  - Update volume management section to reflect database-only storage

  **Cron Expression Handling:**
  - Add comprehensive documentation for APScheduler cron format conversion
  - Document that from_crontab() accepts standard format (Sunday=0) and converts automatically
  - Add validate_cron_expression() helper method with detailed error messages
  - Include helpful hints for day-of-week field errors in validation
  - Fix all deprecated datetime.utcnow() calls, replace with datetime.now(timezone.utc)

  **Timezone-Aware DateTime Fixes:**
  - Fix "can't subtract offset-naive and offset-aware datetimes" error
  - Add timezone awareness to croniter.get_next() return values
  - Make _get_relative_time() defensive to handle both naive and aware datetimes
  - Ensure all datetime comparisons use timezone-aware objects

  **Schedule Edit UI Fixes:**
  - Fix JavaScript error "Cannot set properties of null (setting 'value')"
  - Change reference from non-existent 'config-id' to correct 'config-file' element
  - Add config_name field to schedule API responses for better UX
  - Eagerly load Schedule.config relationship using joinedload()
  - Fix AttributeError: use schedule.config.title instead of .name
  - Display config title and ID in schedule edit form

  **Technical Details:**
  - app/web/services/schedule_service.py: 6 datetime.utcnow() fixes, validation enhancements
  - app/web/services/scheduler_service.py: Documentation, validation, timezone fixes
  - app/web/templates/schedule_edit.html: JavaScript element reference fix
  - docs/DEPLOYMENT.md: Complete rewrite of config management sections

  Fixes scheduling for Sunday at midnight (cron: 0 0 * * 0)
  Fixes schedule edit page JavaScript errors
  Improves user experience with config title display
This commit is contained in:
2025-11-24 12:53:06 -06:00
parent f24bd11dfd
commit 5e3a70f837
7 changed files with 276 additions and 139 deletions

View File

@@ -676,29 +676,57 @@ class SneakyScanner:
return services
def _is_likely_web_service(self, service: Dict) -> bool:
def _is_likely_web_service(self, service: Dict, ip: str = None) -> bool:
"""
Check if a service is likely HTTP/HTTPS based on nmap detection or common web ports
Check if a service is a web server by actually making an HTTP request
Args:
service: Service dictionary from nmap results
ip: IP address to test (required for HTTP probe)
Returns:
True if service appears to be web-related
True if service responds to HTTP/HTTPS requests
"""
# Check service name
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Quick check for known web service names first
web_services = ['http', 'https', 'ssl', 'http-proxy', 'https-alt',
'http-alt', 'ssl/http', 'ssl/https']
service_name = service.get('service', '').lower()
if service_name in web_services:
return True
# Check common non-standard web ports
web_ports = [80, 443, 8000, 8006, 8008, 8080, 8081, 8443, 8888, 9443]
# If no IP provided, can't do HTTP probe
port = service.get('port')
if not ip or not port:
# check just the service if no IP - honestly shouldn't get here, but just incase...
if service_name in web_services:
return True
return False
return port in web_ports
# Actually try to connect - this is the definitive test
# Try HTTPS first, then HTTP
for protocol in ['https', 'http']:
url = f"{protocol}://{ip}:{port}/"
try:
response = requests.get(
url,
timeout=3,
verify=False,
allow_redirects=False
)
# Any status code means it's a web server
# (including 404, 500, etc. - still a web server)
return True
except requests.exceptions.SSLError:
# SSL error on HTTPS, try HTTP next
continue
except (requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
requests.exceptions.RequestException):
continue
return False
def _detect_http_https(self, ip: str, port: int, timeout: int = 5) -> str:
"""
@@ -886,7 +914,7 @@ class SneakyScanner:
ip_results = {}
for service in services:
if not self._is_likely_web_service(service):
if not self._is_likely_web_service(service, ip):
continue
port = service['port']

View File

@@ -6,7 +6,7 @@ scheduled scans with cron expressions.
"""
import logging
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
from croniter import croniter
@@ -71,6 +71,7 @@ class ScheduleService:
next_run = self.calculate_next_run(cron_expression) if enabled else None
# Create schedule record
now_utc = datetime.now(timezone.utc)
schedule = Schedule(
name=name,
config_id=config_id,
@@ -78,8 +79,8 @@ class ScheduleService:
enabled=enabled,
last_run=None,
next_run=next_run,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
created_at=now_utc,
updated_at=now_utc
)
self.db.add(schedule)
@@ -103,7 +104,14 @@ class ScheduleService:
Raises:
ValueError: If schedule not found
"""
schedule = self.db.query(Schedule).filter(Schedule.id == schedule_id).first()
from sqlalchemy.orm import joinedload
schedule = (
self.db.query(Schedule)
.options(joinedload(Schedule.config))
.filter(Schedule.id == schedule_id)
.first()
)
if not schedule:
raise ValueError(f"Schedule {schedule_id} not found")
@@ -138,8 +146,10 @@ class ScheduleService:
'pages': int
}
"""
# Build query
query = self.db.query(Schedule)
from sqlalchemy.orm import joinedload
# Build query and eagerly load config relationship
query = self.db.query(Schedule).options(joinedload(Schedule.config))
# Apply filter
if enabled_filter is not None:
@@ -215,7 +225,7 @@ class ScheduleService:
if hasattr(schedule, key):
setattr(schedule, key, value)
schedule.updated_at = datetime.utcnow()
schedule.updated_at = datetime.now(timezone.utc)
self.db.commit()
self.db.refresh(schedule)
@@ -298,7 +308,7 @@ class ScheduleService:
schedule.last_run = last_run
schedule.next_run = next_run
schedule.updated_at = datetime.utcnow()
schedule.updated_at = datetime.now(timezone.utc)
self.db.commit()
@@ -311,23 +321,43 @@ class ScheduleService:
Validate a cron expression.
Args:
cron_expr: Cron expression to validate
cron_expr: Cron expression to validate in standard crontab format
Format: minute hour day month day_of_week
Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday
(APScheduler will convert this to its internal format automatically)
Returns:
Tuple of (is_valid, error_message)
- (True, None) if valid
- (False, error_message) if invalid
Note:
This validates using croniter which uses standard crontab format.
APScheduler's from_crontab() will handle the conversion when the
schedule is registered with the scheduler.
"""
try:
# Try to create a croniter instance
base_time = datetime.utcnow()
# croniter uses standard crontab format (Sunday=0)
from datetime import timezone
base_time = datetime.now(timezone.utc)
cron = croniter(cron_expr, base_time)
# Try to get the next run time (validates the expression)
cron.get_next(datetime)
# Validate basic format (5 fields)
fields = cron_expr.split()
if len(fields) != 5:
return (False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}")
return (True, None)
except (ValueError, KeyError) as e:
error_msg = str(e)
# Add helpful hint for day_of_week errors
if "day" in error_msg.lower() and len(cron_expr.split()) >= 5:
hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
return (False, f"{error_msg}{hint}")
return (False, str(e))
except Exception as e:
return (False, f"Unexpected error: {str(e)}")
@@ -345,17 +375,24 @@ class ScheduleService:
from_time: Base time (defaults to now UTC)
Returns:
Next run datetime (UTC)
Next run datetime (UTC, timezone-aware)
Raises:
ValueError: If cron expression is invalid
"""
if from_time is None:
from_time = datetime.utcnow()
from_time = datetime.now(timezone.utc)
try:
cron = croniter(cron_expr, from_time)
return cron.get_next(datetime)
next_run = cron.get_next(datetime)
# croniter returns naive datetime, so we need to add timezone info
# Since we're using UTC for all calculations, add UTC timezone
if next_run.tzinfo is None:
next_run = next_run.replace(tzinfo=timezone.utc)
return next_run
except Exception as e:
raise ValueError(f"Invalid cron expression '{cron_expr}': {str(e)}")
@@ -403,10 +440,16 @@ class ScheduleService:
Returns:
Dictionary representation
"""
# Get config title if relationship is loaded
config_name = None
if schedule.config:
config_name = schedule.config.title
return {
'id': schedule.id,
'name': schedule.name,
'config_id': schedule.config_id,
'config_name': config_name,
'cron_expression': schedule.cron_expression,
'enabled': schedule.enabled,
'last_run': schedule.last_run.isoformat() if schedule.last_run else None,
@@ -421,7 +464,7 @@ class ScheduleService:
Format datetime as relative time.
Args:
dt: Datetime to format (UTC)
dt: Datetime to format (UTC, can be naive or aware)
Returns:
Human-readable relative time (e.g., "in 2 hours", "yesterday")
@@ -429,7 +472,13 @@ class ScheduleService:
if dt is None:
return None
now = datetime.utcnow()
# Ensure both datetimes are timezone-aware for comparison
now = datetime.now(timezone.utc)
# If dt is naive, assume it's UTC and add timezone info
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
diff = dt - now
# Future times

View File

@@ -149,6 +149,51 @@ class SchedulerService:
except Exception as e:
logger.error(f"Error loading schedules on startup: {str(e)}", exc_info=True)
@staticmethod
def validate_cron_expression(cron_expression: str) -> tuple[bool, str]:
"""
Validate a cron expression and provide helpful feedback.
Args:
cron_expression: Cron expression to validate
Returns:
Tuple of (is_valid: bool, message: str)
- If valid: (True, "Valid cron expression")
- If invalid: (False, "Error message with details")
Note:
Standard crontab format: minute hour day month day_of_week
Day of week: 0=Sunday, 1=Monday, ..., 6=Saturday (or 7=Sunday)
"""
from apscheduler.triggers.cron import CronTrigger
try:
# Try to parse the expression
trigger = CronTrigger.from_crontab(cron_expression)
# Validate basic format (5 fields)
fields = cron_expression.split()
if len(fields) != 5:
return False, f"Cron expression must have 5 fields (minute hour day month day_of_week), got {len(fields)}"
return True, "Valid cron expression"
except (ValueError, KeyError) as e:
error_msg = str(e)
# Provide helpful hints for common errors
if "day_of_week" in error_msg.lower() or (len(cron_expression.split()) >= 5):
# Check if day_of_week field might be using APScheduler format by mistake
fields = cron_expression.split()
if len(fields) == 5:
dow_field = fields[4]
if dow_field.isdigit() and int(dow_field) >= 0:
hint = "\nNote: Use standard crontab format where 0=Sunday, 1=Monday, ..., 6=Saturday"
return False, f"Invalid cron expression: {error_msg}{hint}"
return False, f"Invalid cron expression: {error_msg}"
def queue_scan(self, scan_id: int, config_id: int) -> str:
"""
Queue a scan for immediate background execution.
@@ -188,6 +233,10 @@ class SchedulerService:
schedule_id: Database ID of the schedule
config_id: Database config ID
cron_expression: Cron expression (e.g., "0 2 * * *" for 2am daily)
IMPORTANT: Use standard crontab format where:
- Day of week: 0 = Sunday, 1 = Monday, ..., 6 = Saturday
- APScheduler automatically converts to its internal format
- from_crontab() handles the conversion properly
Returns:
Job ID from APScheduler
@@ -195,18 +244,29 @@ class SchedulerService:
Raises:
RuntimeError: If scheduler not initialized
ValueError: If cron expression is invalid
Note:
APScheduler internally uses Monday=0, but from_crontab() accepts
standard crontab format (Sunday=0) and converts it automatically.
"""
if not self.scheduler:
raise RuntimeError("Scheduler not initialized. Call init_scheduler() first.")
from apscheduler.triggers.cron import CronTrigger
# Validate cron expression first to provide helpful error messages
is_valid, message = self.validate_cron_expression(cron_expression)
if not is_valid:
raise ValueError(message)
# Create cron trigger from expression using local timezone
# This allows users to specify times in their local timezone
# from_crontab() parses standard crontab format (Sunday=0)
# and converts to APScheduler's internal format (Monday=0) automatically
try:
trigger = CronTrigger.from_crontab(cron_expression)
# timezone defaults to local system timezone
except (ValueError, KeyError) as e:
# This should not happen due to validation above, but catch anyway
raise ValueError(f"Invalid cron expression '{cron_expression}': {str(e)}")
# Add cron job
@@ -294,11 +354,16 @@ class SchedulerService:
# Update schedule's last_run and next_run
from croniter import croniter
next_run = croniter(schedule['cron_expression'], datetime.utcnow()).get_next(datetime)
now_utc = datetime.now(timezone.utc)
next_run = croniter(schedule['cron_expression'], now_utc).get_next(datetime)
# croniter returns naive datetime, add UTC timezone
if next_run.tzinfo is None:
next_run = next_run.replace(tzinfo=timezone.utc)
schedule_service.update_run_times(
schedule_id=schedule_id,
last_run=datetime.utcnow(),
last_run=now_utc,
next_run=next_run
)

View File

@@ -298,7 +298,11 @@ async function loadSchedule() {
function populateForm(schedule) {
document.getElementById('schedule-id').value = schedule.id;
document.getElementById('schedule-name').value = schedule.name;
document.getElementById('config-id').value = schedule.config_id;
// Display config name and ID in the readonly config-file field
const configDisplay = schedule.config_name
? `${schedule.config_name} (ID: ${schedule.config_id})`
: `Config ID: ${schedule.config_id}`;
document.getElementById('config-file').value = configDisplay;
document.getElementById('cron-expression').value = schedule.cron_expression;
document.getElementById('schedule-enabled').checked = schedule.enabled;