Files
SneakyScope/app/utils/settings.py
Phillip Tarrant 693f7d67b9 feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
  - crt.sh lookup with expired-filtering and root-domain wildcard resolution
  - live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data

- Introduce `URLNormalizer` with punycode support and typo repair
  - Auto-prepend `https://` for bare domains (e.g., `google.com`)
  - Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
  - `get_url_normalizer()` reads defaults from Settings (if present)

- Standardize function-rule return shape to `(bool, dict|None)` across
  `form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
  - Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
  - Adapt non-dict inputs to facts (category-aware and via provided adapter)
  - Return `(True, dict)` on match, `(False, None)` on miss
  - Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
  - Back rules by private `self._rules`; `rules` property returns copy
  - Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
  - Fix AttributeError from property assignment during `__init__`

- Replace hidden singleton factory with explicit builder + global state:
  - `app/rules/factory.py::build_rules_engine()` builds and logs totals
  - `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
  - `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts

- Unify logging wiring:
  - `wire_logging_once(app)` clears and attaches a single handler chain
  - Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
  - Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)

- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)

- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section

- Consumers should import the rules engine via:
  - `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
  then publish with `set_rules_engine()`. Do not call old singleton factories.

- New/changed modules (high level):
  - `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
  - `app/rules/function_rules.py` (±) — normalized payload returns
  - `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
  - `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
  - `app/rules/factory.py` (±) — pure builder; totals logged post-registration
  - `app/state.py` (+) — process-global rules engine
  - `app/logging_setup.py` (±) — single chain, two named loggers
  - `app/wsgi.py` (±) — preload build + `set_rules_engine()`
  - `entrypoint.sh` (±) — add `--preload`
  - templates (±) — TLS card, raw toggle; front-page checkbox

Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
2025-08-21 22:05:16 -05:00

159 lines
4.8 KiB
Python

#
# Note the settings file is hardcoded in this class at the top after imports.
#
# To make a new settings section, just add the setting dict to your yaml
# and then define the data class below in the config data classes area.
#
# Example use from anywhere - this will always return the same singleton
# from settings import get_settings
# def main():
# settings = get_settings()
# print(settings.database.host) # Autocomplete works
# print(settings.logging.level)
# if __name__ == "__main__":
# main()
import functools
from pathlib import Path
from typing import Any, Callable, TypeVar
from dataclasses import dataclass, fields, is_dataclass, field, MISSING
import logging
import sys
logger = logging.getLogger(__file__)
try:
import yaml
except ModuleNotFoundError:
msg = (
"Required modules are not installed. "
"Can not continue with module / application loading.\n"
"Install it with: pip install -r requirements"
)
print(msg, file=sys.stderr)
logger.error(msg)
exit()
BASE_DIR = Path(__file__).resolve().parent.parent
DEFAULT_SETTINGS_FILE = BASE_DIR / "config" / "settings.yaml"
# ---------- CONFIG DATA CLASSES ----------
@dataclass
class External_FetchConfig:
enabled: bool = True
max_total_mb: int = 5
max_time_ms: int = 3000
max_redirects: int = 3
concurrency: int = 3
@dataclass
class UIConfig:
snippet_preview_len: int = 160
@dataclass
class Cache_Config:
whois_cache_days: int = 7
geoip_cache_days: int = 7
recent_runs_count: int = 10
@dataclass
class AppConfig:
name: str = "MyApp"
version_major: int = 1
version_minor: int = 0
print_rule_loads: bool = False
@dataclass
class Settings:
cache: Cache_Config = field(default_factory=Cache_Config)
ui: UIConfig = field(default_factory=UIConfig)
external_fetch: External_FetchConfig = field(default_factory=External_FetchConfig)
app: AppConfig = field(default_factory=AppConfig)
@classmethod
def from_yaml(cls, path: str | Path) -> "Settings":
try:
"""Load settings from YAML file into a Settings object."""
with open(path, "r", encoding="utf-8") as f:
raw: dict[str, Any] = yaml.safe_load(f) or {}
except FileNotFoundError:
logger.warning(f"Settings file {path} not found! Using default settings.")
raw = {}
init_kwargs = {}
for f_def in fields(cls):
yaml_value = raw.get(f_def.name, None)
# Determine default value from default_factory or default
if f_def.default_factory is not MISSING:
default_value = f_def.default_factory()
elif f_def.default is not MISSING:
default_value = f_def.default
else:
default_value = None
# Handle nested dataclasses
if is_dataclass(f_def.type):
if isinstance(yaml_value, dict):
# Merge YAML values with defaults
merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
merged_data.update(yaml_value)
init_kwargs[f_def.name] = f_def.type(**merged_data)
else:
init_kwargs[f_def.name] = default_value
else:
init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
return cls(**init_kwargs)
# ---------- SINGLETON DECORATOR ----------
T = TypeVar("T")
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
"""Ensure the function only runs once, returning the cached value."""
cache: dict[str, T] = {}
@functools.wraps(func)
def wrapper(*args, **kwargs) -> T:
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
# ---------- SINGLETON DECORATOR ----------
T = TypeVar("T")
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
"""Decorator to ensure the settings are loaded only once."""
cache: dict[str, T] = {}
@functools.wraps(func)
def wrapper(*args, **kwargs) -> T:
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
@singleton_loader
def get_settings(config_path: str | Path | None = None) -> Settings:
"""
Returns the singleton Settings instance.
Args:
config_path: Optional path to the YAML config file. If not provided,
defaults to 'config/settings.yaml' in the current working directory.
"""
if config_path is None:
config_path = DEFAULT_SETTINGS_FILE
else:
config_path = Path(config_path)
return Settings.from_yaml(config_path)