feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
- crt.sh lookup with expired-filtering and root-domain wildcard resolution
- live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data
- Introduce `URLNormalizer` with punycode support and typo repair
- Auto-prepend `https://` for bare domains (e.g., `google.com`)
- Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
- `get_url_normalizer()` reads defaults from Settings (if present)
- Standardize function-rule return shape to `(bool, dict|None)` across
`form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
- Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
- Adapt non-dict inputs to facts (category-aware and via provided adapter)
- Return `(True, dict)` on match, `(False, None)` on miss
- Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
- Back rules by private `self._rules`; `rules` property returns copy
- Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
- Fix AttributeError from property assignment during `__init__`
- Replace hidden singleton factory with explicit builder + global state:
- `app/rules/factory.py::build_rules_engine()` builds and logs totals
- `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
- `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts
- Unify logging wiring:
- `wire_logging_once(app)` clears and attaches a single handler chain
- Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
- Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)
- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)
- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section
- Consumers should import the rules engine via:
- `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
then publish with `set_rules_engine()`. Do not call old singleton factories.
- New/changed modules (high level):
- `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
- `app/rules/function_rules.py` (±) — normalized payload returns
- `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
- `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
- `app/rules/factory.py` (±) — pure builder; totals logged post-registration
- `app/state.py` (+) — process-global rules engine
- `app/logging_setup.py` (±) — single chain, two named loggers
- `app/wsgi.py` (±) — preload build + `set_rules_engine()`
- `entrypoint.sh` (±) — add `--preload`
- templates (±) — TLS card, raw toggle; front-page checkbox
Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
This commit is contained in:
51
app/rules/factory.py
Normal file
51
app/rules/factory.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# app/rules/factory.py
|
||||
from pathlib import Path
|
||||
|
||||
from app.logging_setup import get_engine_logger
|
||||
from app.rules.rules_engine import RuleEngine
|
||||
from app.rules.rules_engine import Rule
|
||||
from app.rules.function_rules import FunctionRuleAdapter
|
||||
from app.rules.function_rules import (
|
||||
form_action_missing, form_http_on_https_page, form_submits_to_different_host,
|
||||
script_src_uses_data_or_blob, script_src_has_dangerous_extension, script_third_party_host,
|
||||
)
|
||||
|
||||
from app.rules.rules_engine import load_rules_from_yaml
|
||||
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
RULES_FILE_PATH = base_dir / "config" / "suspicious_rules.yaml"
|
||||
|
||||
log = get_engine_logger()
|
||||
|
||||
def build_rules_engine() -> RuleEngine:
|
||||
eng = RuleEngine()
|
||||
|
||||
# 1) YAML rules
|
||||
yaml_rules = load_rules_from_yaml(RULES_FILE_PATH)
|
||||
for r in yaml_rules:
|
||||
eng.add_rule(r)
|
||||
log.info("Found %d suspicious rules from %s",
|
||||
len(yaml_rules), getattr(yaml_rules, "source_path", "config"))
|
||||
|
||||
# 2) Function rules
|
||||
from app.rules.function_rules import FactAdapter
|
||||
adapter = FactAdapter()
|
||||
|
||||
def add(rule: Rule):
|
||||
eng.add_rule(rule)
|
||||
|
||||
add(Rule("form_action_missing", "Form has no action attribute", "form", "function",
|
||||
FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing")))
|
||||
add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
|
||||
FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
|
||||
add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
|
||||
FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
|
||||
add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
|
||||
FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
|
||||
add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
|
||||
FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
|
||||
add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
|
||||
FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
|
||||
|
||||
log.info("Registered %d total rules (YAML + function)", len(eng.rules))
|
||||
return eng
|
||||
@@ -19,10 +19,16 @@ Note:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
import inspect
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
|
||||
from app.logging_setup import get_app_logger
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:", "about:blank"}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapters
|
||||
@@ -36,9 +42,6 @@ class FactAdapter:
|
||||
You can expand the per-category parsers over time as needed.
|
||||
"""
|
||||
|
||||
def __init__(self, logger: Optional[Any] = None) -> None:
|
||||
self.logger = logger
|
||||
|
||||
def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
|
||||
"""
|
||||
Adapt text_or_facts (str or dict) into a facts dict.
|
||||
@@ -65,13 +68,11 @@ class FactAdapter:
|
||||
elif category == "text":
|
||||
return {"category": "text", "raw": text_or_facts}
|
||||
else:
|
||||
if self.logger:
|
||||
self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
|
||||
app_logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
|
||||
return {"category": category, "raw": text_or_facts}
|
||||
|
||||
# Fallback for unrecognized input types
|
||||
if self.logger:
|
||||
self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
|
||||
app_logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
|
||||
return {"category": category, "raw": text_or_facts}
|
||||
|
||||
# ---- Per-category parsers ----
|
||||
@@ -109,23 +110,149 @@ class FactAdapter:
|
||||
|
||||
class FunctionRuleAdapter:
|
||||
"""
|
||||
Callable wrapper that adapts engine input (str or dict) into 'facts' and then
|
||||
invokes the underlying function rule that expects a facts dict.
|
||||
Wraps a function-based rule so it ALWAYS returns:
|
||||
- match: (True, Dict[str, Any])
|
||||
- no match: (False, None)
|
||||
|
||||
Usage:
|
||||
wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger))
|
||||
matched, reason = wrapped("action=https://...") # engine-friendly
|
||||
Also adapts non-dict inputs into facts via a provided 'adapter' using a
|
||||
duck-typed protocol, so callers can pass raw items (e.g., strings/nodes).
|
||||
"""
|
||||
|
||||
def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
fn: Callable[[Dict[str, Any]], Any],
|
||||
category: str,
|
||||
adapter: Optional[Any] = None,
|
||||
rule_name: Optional[str] = None,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
):
|
||||
self.fn = fn
|
||||
self.category = category
|
||||
self.adapter = adapter or FactAdapter()
|
||||
self.adapter = adapter
|
||||
self.rule_name = rule_name or getattr(fn, "__name__", "<anonymous>")
|
||||
|
||||
|
||||
def __call__(self, text_or_facts: Any):
|
||||
facts = self.adapter.adapt(text_or_facts, category=self.category)
|
||||
return self.fn(facts)
|
||||
# ---------- helpers ----------
|
||||
|
||||
def _adapt_to_facts(self, raw: Any) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Convert whatever the engine passed into a facts dict.
|
||||
Tries the provided adapter using a duck-typed protocol.
|
||||
Returns a dict, or None if we can't adapt.
|
||||
"""
|
||||
# Already a dict? Use it.
|
||||
if isinstance(raw, dict):
|
||||
return raw
|
||||
|
||||
# Try adapter if provided
|
||||
if self.adapter is not None:
|
||||
# Preferred generic signatures
|
||||
for meth in ("build_facts", "facts", "to_facts"):
|
||||
fn = getattr(self.adapter, meth, None)
|
||||
if callable(fn):
|
||||
try:
|
||||
facts = fn(self.category, raw)
|
||||
if isinstance(facts, dict):
|
||||
return facts
|
||||
except Exception as exc:
|
||||
app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
|
||||
|
||||
# Category-specific fallbacks: build_<category>_facts / <category>_facts
|
||||
cands = (f"build_{self.category}_facts", f"{self.category}_facts")
|
||||
for meth in cands:
|
||||
fn = getattr(self.adapter, meth, None)
|
||||
if callable(fn):
|
||||
try:
|
||||
facts = fn(raw)
|
||||
if isinstance(facts, dict):
|
||||
return facts
|
||||
except Exception as exc:
|
||||
app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
|
||||
|
||||
# No way to adapt
|
||||
return None
|
||||
|
||||
def _coerce_return(self, outcome: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Normalize rule function returns:
|
||||
|
||||
accepted:
|
||||
(bool, dict|None)
|
||||
(bool, str) -> dict {'note': str} on match
|
||||
(bool,) or bool -> (bool, None)
|
||||
|
||||
On invalid shapes, treat as no-match.
|
||||
"""
|
||||
# Exact 2-tuple
|
||||
if isinstance(outcome, tuple) and len(outcome) == 2:
|
||||
matched = bool(outcome[0])
|
||||
raw = outcome[1]
|
||||
|
||||
if not matched:
|
||||
return False, None
|
||||
|
||||
if raw is None:
|
||||
return True, {} # match with empty payload is fine
|
||||
if isinstance(raw, dict):
|
||||
return True, raw
|
||||
if isinstance(raw, str):
|
||||
return True, {"note": raw}
|
||||
|
||||
app_logger.warning("[Rule] '%s' returned payload of invalid type: %s",
|
||||
self.rule_name, type(raw).__name__)
|
||||
# Still treat as match but give minimal payload
|
||||
return True, {"note": "coerced-invalid-payload", "value_repr": repr(raw)}
|
||||
|
||||
# Legacy: (bool,) or bare bool
|
||||
if isinstance(outcome, tuple) and len(outcome) == 1 and isinstance(outcome[0], bool):
|
||||
return (True, {}) if outcome[0] else (False, None)
|
||||
if isinstance(outcome, bool):
|
||||
return (True, {}) if outcome else (False, None)
|
||||
|
||||
# Junk -> no match
|
||||
app_logger.warning("[Rule] '%s' returned invalid shape: %s",
|
||||
self.rule_name, type(outcome).__name__)
|
||||
return False, None
|
||||
|
||||
# ---------- callable ----------
|
||||
|
||||
def __call__(self, raw: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Apply the wrapped rule to the provided item (raw or facts).
|
||||
Returns:
|
||||
(True, dict) on match
|
||||
(False, None) on no match
|
||||
"""
|
||||
facts = self._adapt_to_facts(raw)
|
||||
if facts is None:
|
||||
app_logger.warning("[Rule] '%s' received non-dict facts (%s). Coercing to miss.",
|
||||
self.rule_name, type(raw).__name__)
|
||||
return False, None
|
||||
|
||||
try:
|
||||
outcome = self.fn(facts)
|
||||
except Exception as exc:
|
||||
app_logger.exception("[Rule] '%s' raised: %s", self.rule_name, exc)
|
||||
return False, None
|
||||
|
||||
matched, payload = self._coerce_return(outcome)
|
||||
return matched, payload
|
||||
|
||||
|
||||
def _hit(payload: Optional[Dict[str, Any]] = None) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Standardize a positive match result: (True, dict)
|
||||
"""
|
||||
if payload is None:
|
||||
payload = {}
|
||||
return True, payload
|
||||
|
||||
|
||||
def _miss() -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Standardize a negative match result: (False, None)
|
||||
"""
|
||||
return False, None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Function-based rules (dict 'facts' expected)
|
||||
@@ -133,20 +260,25 @@ class FunctionRuleAdapter:
|
||||
|
||||
# ---------------- Script rules ----------------
|
||||
|
||||
def script_src_uses_data_or_blob(facts: Dict[str, Any]):
|
||||
def script_src_uses_data_or_blob(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags <script> tags with src='data:' or 'blob:'."""
|
||||
src = facts.get("src") or ""
|
||||
if isinstance(src, str) and src.startswith(("data:", "blob:")):
|
||||
scheme = src.split(":", 1)[0]
|
||||
return True, f"Script src uses {scheme}: URL"
|
||||
return False, None
|
||||
return _hit({
|
||||
"scheme": scheme,
|
||||
"src": src,
|
||||
"note": f"Script src uses {scheme}: URL"
|
||||
})
|
||||
return _miss()
|
||||
|
||||
|
||||
def script_src_has_dangerous_extension(facts: Dict[str, Any]):
|
||||
def script_src_has_dangerous_extension(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
|
||||
src = facts.get("src") or ""
|
||||
if not isinstance(src, str):
|
||||
return False, None
|
||||
return _miss()
|
||||
|
||||
low = src.lower()
|
||||
dangerous = (".vbs", ".hta")
|
||||
i = 0
|
||||
@@ -154,31 +286,43 @@ def script_src_has_dangerous_extension(facts: Dict[str, Any]):
|
||||
while i < m:
|
||||
ext = dangerous[i]
|
||||
if low.endswith(ext):
|
||||
return True, f"External script has dangerous extension ({ext})"
|
||||
return _hit({
|
||||
"ext": ext,
|
||||
"src": src,
|
||||
"note": f"External script has dangerous extension ({ext})"
|
||||
})
|
||||
i = i + 1
|
||||
return False, None
|
||||
|
||||
return _miss()
|
||||
|
||||
|
||||
def script_third_party_host(facts: Dict[str, Any]):
|
||||
def script_third_party_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags scripts loaded from a different hostname than the page."""
|
||||
base_host = facts.get("base_hostname") or ""
|
||||
src_host = facts.get("src_hostname") or ""
|
||||
if base_host and src_host and base_host != src_host:
|
||||
return True, f"Third-party script host: {src_host}"
|
||||
return False, None
|
||||
return _hit({
|
||||
"base_host": base_host,
|
||||
"src_host": src_host,
|
||||
"note": f"Third-party script host: {src_host}"
|
||||
})
|
||||
return _miss()
|
||||
|
||||
|
||||
# ---------------- Form rules ----------------
|
||||
|
||||
def form_action_missing(facts: Dict[str, Any]):
|
||||
def form_action_missing(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags <form> elements with no meaningful action attribute."""
|
||||
action = (facts.get("action") or "").strip()
|
||||
if action in _NOOP_ACTIONS:
|
||||
return True, "Form has no action attribute (or uses a no-op action)"
|
||||
return False, None
|
||||
return _hit({
|
||||
"action": action,
|
||||
"note": "Form has no action attribute (or uses a no-op action)"
|
||||
})
|
||||
return _miss()
|
||||
|
||||
|
||||
def form_http_on_https_page(facts: Dict[str, Any]):
|
||||
def form_http_on_https_page(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
|
||||
base_url = (facts.get("base_url") or "").strip()
|
||||
action = (facts.get("action") or "").strip()
|
||||
@@ -188,30 +332,38 @@ def form_http_on_https_page(facts: Dict[str, Any]):
|
||||
parsed_act = urlparse(action)
|
||||
act_scheme = (parsed_act.scheme or "").lower()
|
||||
except Exception:
|
||||
return False, None # parsing trouble → don’t flag
|
||||
return _miss() # parsing trouble → don’t flag
|
||||
|
||||
# Only flag absolute http:// actions on https pages.
|
||||
# Relative or schemeless ('//host/...') isn’t flagged here (it won’t be HTTP on an HTTPS page).
|
||||
if base_scheme == "https" and act_scheme == "http":
|
||||
return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
|
||||
return False, None
|
||||
return _hit({
|
||||
"base_url": base_url,
|
||||
"action": parsed_act.geturl(),
|
||||
"note": "Submits over insecure HTTP"
|
||||
})
|
||||
return _miss()
|
||||
|
||||
|
||||
def form_submits_to_different_host(facts: Dict[str, Any]):
|
||||
def form_submits_to_different_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||
"""Flags <form> actions that submit to a different hostname than the page."""
|
||||
base_host = (facts.get("base_hostname") or "").strip().lower()
|
||||
action = (facts.get("action") or "").strip()
|
||||
|
||||
if not action or action in _NOOP_ACTIONS:
|
||||
return False, None
|
||||
return _miss()
|
||||
|
||||
try:
|
||||
parsed = urlparse(action)
|
||||
act_host = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
return False, None
|
||||
return _miss()
|
||||
|
||||
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
|
||||
if act_host and base_host and act_host != base_host:
|
||||
return True, f"Submits to a different host ({act_host} vs {base_host})"
|
||||
return False, None
|
||||
return _hit({
|
||||
"base_host": base_host,
|
||||
"act_host": act_host,
|
||||
"action": action,
|
||||
"note": "Submits to a different host"
|
||||
})
|
||||
return _miss()
|
||||
352
app/rules/rules_engine.py
Normal file
352
app/rules/rules_engine.py
Normal file
@@ -0,0 +1,352 @@
|
||||
"""
|
||||
rules_engine.py
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from app.logging_setup import get_engine_logger
|
||||
from app.utils.settings import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
import yaml
|
||||
|
||||
try:
|
||||
# Flask is optional; engine still works without it.
|
||||
from flask import current_app, has_app_context
|
||||
except Exception:
|
||||
current_app = None # type: ignore
|
||||
def has_app_context() -> bool: # type: ignore
|
||||
return False
|
||||
|
||||
logger = get_engine_logger()
|
||||
|
||||
@dataclass
|
||||
class Rule:
|
||||
"""
|
||||
Represents a single detection rule.
|
||||
|
||||
When rule_type == 'regex', 'pattern' must be provided.
|
||||
When rule_type == 'function', 'function' must be provided and return (matched: bool, reason: str).
|
||||
"""
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
rule_type: str = "regex"
|
||||
pattern: Optional[str] = None
|
||||
function: Optional[Callable[[str], Tuple[bool, str]]] = None
|
||||
severity: Optional[str] = None # 'low' | 'medium' | 'high' (optional)
|
||||
tags: Optional[List[str]] = field(default=None) # e.g., ['obfuscation', 'phishing'] (optional)
|
||||
|
||||
# Internal compiled regex cache (not serialized)
|
||||
_compiled_regex: Optional[re.Pattern] = field(default=None, repr=False, compare=False)
|
||||
|
||||
def compile_if_needed(self) -> bool:
|
||||
"""
|
||||
Compile the regex pattern once for performance, if applicable.
|
||||
|
||||
Returns:
|
||||
bool: True if the regex is compiled and ready, False otherwise.
|
||||
"""
|
||||
|
||||
if self.rule_type == "regex" and self.pattern:
|
||||
try:
|
||||
self._compiled_regex = re.compile(self.pattern, re.IGNORECASE)
|
||||
logger.debug(f"[Rule] Compiled regex for '{self.name}'")
|
||||
return True
|
||||
except re.error as rex:
|
||||
self._compiled_regex = None
|
||||
logger.warning(f"[Rule] Failed to compile regex for '{self.name}': {rex}")
|
||||
return False
|
||||
return False
|
||||
|
||||
def run(self, text: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Run the rule on the given text.
|
||||
|
||||
Returns:
|
||||
(matched: bool, reason: str)
|
||||
"""
|
||||
if self.rule_type == "regex":
|
||||
if not self.pattern:
|
||||
logger.warning(f"[Rule] '{self.name}' missing regex pattern.")
|
||||
return False, "Invalid rule configuration: missing pattern"
|
||||
|
||||
if self._compiled_regex is None:
|
||||
compiled_ok = self.compile_if_needed()
|
||||
if not compiled_ok:
|
||||
return False, f"Invalid regex pattern: {self.pattern!r}"
|
||||
|
||||
if self._compiled_regex and self._compiled_regex.search(text):
|
||||
return True, f"Matched regex '{self.pattern}' → {self.description}"
|
||||
return False, "No match"
|
||||
|
||||
if self.rule_type == "function":
|
||||
if callable(self.function):
|
||||
try:
|
||||
matched, reason = self.function(text)
|
||||
if isinstance(matched, bool) and isinstance(reason, str):
|
||||
return matched, reason
|
||||
logger.warning(f"[Rule] '{self.name}' function returned invalid types.")
|
||||
return False, "Invalid function return type; expected (bool, str)"
|
||||
except Exception as exc:
|
||||
logger.exception(f"[Rule] '{self.name}' function raised exception.")
|
||||
return False, f"Rule function raised exception: {exc!r}"
|
||||
logger.warning(f"[Rule] '{self.name}' has invalid function configuration.")
|
||||
return False, "Invalid rule configuration: function not callable"
|
||||
|
||||
logger.warning(f"[Rule] '{self.name}' has unknown type '{self.rule_type}'.")
|
||||
return False, f"Invalid rule configuration: unknown type '{self.rule_type}'"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RuleResult:
|
||||
"""
|
||||
Uniform per-rule outcome for UI/API consumption.
|
||||
|
||||
result is "PASS" or "FAIL" (FAIL == matched True)
|
||||
"""
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
result: str # "PASS" | "FAIL"
|
||||
reason: Optional[str] = None
|
||||
severity: Optional[str] = None
|
||||
tags: Optional[List[str]] = None
|
||||
|
||||
|
||||
class RuleEngine:
|
||||
"""
|
||||
Loads and executes rules against provided text, with Flask-aware logging.
|
||||
"""
|
||||
|
||||
def __init__(self, rules: Optional[List[Rule]] = None):
|
||||
"""
|
||||
Args:
|
||||
rules: Optional initial rule list.
|
||||
"""
|
||||
|
||||
# IMPORTANT: back the property with a private list
|
||||
self._rules = [] # was: self.rules = []
|
||||
self._rule_keys = set()
|
||||
self._rule_index = {}
|
||||
|
||||
# If a list of rules was provided, add them via add_rule so compilation happens
|
||||
if rules:
|
||||
i = 0
|
||||
n = len(rules)
|
||||
while i < n:
|
||||
self.add_rule(rules[i]) # compiles regex as needed
|
||||
i = i + 1
|
||||
|
||||
def add_rule(self, rule: Rule, replace: bool = False) -> None:
|
||||
"""
|
||||
Add a new rule at runtime; compiles regex if needed and logs failures.
|
||||
|
||||
Idempotent by (category, name):
|
||||
- If the same (category, name) is already present:
|
||||
* replace=False (default): ignore duplicate and warn.
|
||||
* replace=True: replace the existing rule in place and recompile regex.
|
||||
|
||||
Args:
|
||||
rule: Rule to add.
|
||||
replace: If True, overwrite an existing rule with the same (category, name).
|
||||
"""
|
||||
# Ensure tracking structures exist in case __init__ wasn’t updated somewhere
|
||||
if not hasattr(self, "_rule_keys"):
|
||||
self._rule_keys = set()
|
||||
if not hasattr(self, "_rule_index"):
|
||||
self._rule_index = {}
|
||||
i = 0
|
||||
length = len(getattr(self, "_rules", []))
|
||||
while i < length:
|
||||
existing = self._rules[i]
|
||||
key_i = (existing.category, existing.name)
|
||||
self._rule_keys.add(key_i)
|
||||
self._rule_index[key_i] = i
|
||||
i = i + 1
|
||||
|
||||
key = (rule.category, rule.name)
|
||||
|
||||
if key in self._rule_keys:
|
||||
if not replace:
|
||||
try:
|
||||
logger.warning("[Rules] Duplicate registration ignored: %s/%s", rule.category, rule.name)
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# Replace existing rule in place
|
||||
idx = self._rule_index.get(key)
|
||||
if idx is None:
|
||||
idx = len(self._rules)
|
||||
self._rules.append(rule)
|
||||
self._rule_index[key] = idx
|
||||
else:
|
||||
self._rules[idx] = rule
|
||||
|
||||
if rule.rule_type == "regex":
|
||||
compiled_ok = rule.compile_if_needed()
|
||||
if not compiled_ok:
|
||||
logger.warning(
|
||||
"[Engine] Regex failed when replacing rule '%s' (pattern=%r)",
|
||||
rule.name, getattr(rule, "pattern", None)
|
||||
)
|
||||
return
|
||||
|
||||
if settings.app.print_rule_loads:
|
||||
logger.info(
|
||||
"[engine] add_rule: %s/%s replace=%s -> count=%d",
|
||||
rule.category, rule.name, bool(replace), len(self._rules)
|
||||
)
|
||||
|
||||
# New rule path
|
||||
self._rules.append(rule)
|
||||
self._rule_keys.add(key)
|
||||
self._rule_index[key] = len(self._rules) - 1
|
||||
|
||||
if rule.rule_type == "regex":
|
||||
compiled_ok = rule.compile_if_needed()
|
||||
if not compiled_ok:
|
||||
logger.warning(
|
||||
"[Engine] Regex failed when adding rule '%s' (pattern=%r)",
|
||||
rule.name, getattr(rule, "pattern", None)
|
||||
)
|
||||
|
||||
|
||||
|
||||
# helper, not used ATM
|
||||
def add_rules(self, rules: list[Rule], replace: bool = False) -> None:
|
||||
"""
|
||||
Add many rules safely (idempotent). Uses the same semantics as add_rule.
|
||||
"""
|
||||
i = 0
|
||||
n = len(rules)
|
||||
while i < n:
|
||||
self.add_rule(rules[i], replace=replace)
|
||||
i = i + 1
|
||||
|
||||
def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
|
||||
"""
|
||||
Run all rules against text.
|
||||
|
||||
Args:
|
||||
text: The content to test.
|
||||
category: If provided, only evaluate rules that match this category.
|
||||
|
||||
Returns:
|
||||
List of dicts with PASS/FAIL per rule (JSON-serializable).
|
||||
"""
|
||||
results: List[Dict] = []
|
||||
|
||||
index = 0
|
||||
total = len(self.rules)
|
||||
while index < total:
|
||||
rule = self.rules[index]
|
||||
|
||||
if category is not None and rule.category != category:
|
||||
index = index + 1
|
||||
continue
|
||||
|
||||
matched, reason = rule.run(text)
|
||||
|
||||
result_str = "FAIL" if matched else "PASS"
|
||||
reason_to_include: Optional[str]
|
||||
if matched:
|
||||
reason_to_include = reason
|
||||
else:
|
||||
reason_to_include = None
|
||||
|
||||
rr = RuleResult(
|
||||
name=rule.name,
|
||||
description=rule.description,
|
||||
category=rule.category,
|
||||
result=result_str,
|
||||
reason=reason_to_include,
|
||||
severity=rule.severity,
|
||||
tags=rule.tags,
|
||||
)
|
||||
|
||||
results.append(asdict(rr))
|
||||
index = index + 1
|
||||
|
||||
logger.debug(f"[Engine] Completed evaluation. Returned {len(results)} rule results.")
|
||||
return results
|
||||
|
||||
@property
|
||||
def rules(self) -> List[Rule]:
|
||||
"""Read-only view (returns a shallow copy) of registered rules."""
|
||||
return list(self._rules)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
|
||||
"""
|
||||
Load rules from a YAML file.
|
||||
|
||||
Supports optional 'severity' and 'tags' keys.
|
||||
|
||||
Example YAML:
|
||||
- name: suspicious_eval
|
||||
description: "Use of eval() in script"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "\\beval\\("
|
||||
severity: medium
|
||||
tags: [obfuscation]
|
||||
|
||||
Returns:
|
||||
List[Rule]
|
||||
"""
|
||||
|
||||
rules: List[Rule] = []
|
||||
|
||||
path = Path(yaml_file)
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if not isinstance(data, list):
|
||||
logger.error("[Loader] Rules YAML must be a list of rule objects.")
|
||||
raise ValueError("Rules YAML must be a list of rule objects.")
|
||||
|
||||
idx = 0
|
||||
total = len(data)
|
||||
while idx < total:
|
||||
item = data[idx]
|
||||
|
||||
name = item.get("name")
|
||||
description = item.get("description")
|
||||
category = item.get("category")
|
||||
rule_type = item.get("type", "regex")
|
||||
pattern = item.get("pattern")
|
||||
severity = item.get("severity")
|
||||
tags = item.get("tags")
|
||||
|
||||
if not name or not description or not category:
|
||||
logger.warning(f"[Loader] Skipping invalid rule at index {idx}: missing required fields.")
|
||||
idx = idx + 1
|
||||
continue
|
||||
|
||||
rule = Rule(
|
||||
name=name,
|
||||
description=description,
|
||||
category=category,
|
||||
rule_type=rule_type,
|
||||
pattern=pattern,
|
||||
function=None, # function rules should be registered in code
|
||||
severity=severity,
|
||||
tags=tags if isinstance(tags, list) else None,
|
||||
)
|
||||
|
||||
rules.append(rule)
|
||||
idx = idx + 1
|
||||
|
||||
return rules
|
||||
Reference in New Issue
Block a user