feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
- crt.sh lookup with expired-filtering and root-domain wildcard resolution
- live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data
- Introduce `URLNormalizer` with punycode support and typo repair
- Auto-prepend `https://` for bare domains (e.g., `google.com`)
- Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
- `get_url_normalizer()` reads defaults from Settings (if present)
- Standardize function-rule return shape to `(bool, dict|None)` across
`form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
- Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
- Adapt non-dict inputs to facts (category-aware and via provided adapter)
- Return `(True, dict)` on match, `(False, None)` on miss
- Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
- Back rules by private `self._rules`; `rules` property returns copy
- Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
- Fix AttributeError from property assignment during `__init__`
- Replace hidden singleton factory with explicit builder + global state:
- `app/rules/factory.py::build_rules_engine()` builds and logs totals
- `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
- `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts
- Unify logging wiring:
- `wire_logging_once(app)` clears and attaches a single handler chain
- Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
- Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)
- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)
- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section
- Consumers should import the rules engine via:
- `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
then publish with `set_rules_engine()`. Do not call old singleton factories.
- New/changed modules (high level):
- `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
- `app/rules/function_rules.py` (±) — normalized payload returns
- `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
- `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
- `app/rules/factory.py` (±) — pure builder; totals logged post-registration
- `app/state.py` (+) — process-global rules engine
- `app/logging_setup.py` (±) — single chain, two named loggers
- `app/wsgi.py` (±) — preload build + `set_rules_engine()`
- `entrypoint.sh` (±) — add `--preload`
- templates (±) — TLS card, raw toggle; front-page checkbox
Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
This commit is contained in:
51
app/rules/factory.py
Normal file
51
app/rules/factory.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# app/rules/factory.py
|
||||
from pathlib import Path
|
||||
|
||||
from app.logging_setup import get_engine_logger
|
||||
from app.rules.rules_engine import RuleEngine
|
||||
from app.rules.rules_engine import Rule
|
||||
from app.rules.function_rules import FunctionRuleAdapter
|
||||
from app.rules.function_rules import (
|
||||
form_action_missing, form_http_on_https_page, form_submits_to_different_host,
|
||||
script_src_uses_data_or_blob, script_src_has_dangerous_extension, script_third_party_host,
|
||||
)
|
||||
|
||||
from app.rules.rules_engine import load_rules_from_yaml
|
||||
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
RULES_FILE_PATH = base_dir / "config" / "suspicious_rules.yaml"
|
||||
|
||||
log = get_engine_logger()
|
||||
|
||||
def build_rules_engine() -> RuleEngine:
|
||||
eng = RuleEngine()
|
||||
|
||||
# 1) YAML rules
|
||||
yaml_rules = load_rules_from_yaml(RULES_FILE_PATH)
|
||||
for r in yaml_rules:
|
||||
eng.add_rule(r)
|
||||
log.info("Found %d suspicious rules from %s",
|
||||
len(yaml_rules), getattr(yaml_rules, "source_path", "config"))
|
||||
|
||||
# 2) Function rules
|
||||
from app.rules.function_rules import FactAdapter
|
||||
adapter = FactAdapter()
|
||||
|
||||
def add(rule: Rule):
|
||||
eng.add_rule(rule)
|
||||
|
||||
add(Rule("form_action_missing", "Form has no action attribute", "form", "function",
|
||||
FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing")))
|
||||
add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
|
||||
FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
|
||||
add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
|
||||
FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
|
||||
add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
|
||||
FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
|
||||
add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
|
||||
FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
|
||||
add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
|
||||
FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
|
||||
|
||||
log.info("Registered %d total rules (YAML + function)", len(eng.rules))
|
||||
return eng
|
||||
Reference in New Issue
Block a user