feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload

- Add SSL/TLS intelligence pipeline: - crt.sh lookup with expired-filtering and root-domain wildcard resolution - live TLS version/cipher probe with weak/legacy flags and probe notes - UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists - Front page: checkbox to optionally fetch certificate/CT data - Introduce `URLNormalizer` with punycode support and typo repair - Auto-prepend `https://` for bare domains (e.g., `google.com`) - Optional quick HTTPS reachability + `http://` fallback - Provide singleton via function-cached `@singleton_loader`: - `get_url_normalizer()` reads defaults from Settings (if present) - Standardize function-rule return shape to `(bool, dict|None)` across `form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.) - Harden `FunctionRuleAdapter`: - Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs - Adapt non-dict inputs to facts (category-aware and via provided adapter) - Return `(True, dict)` on match, `(False, None)` on miss - Bind-time logging with file:line + function id for diagnostics - `RuleEngine`: - Back rules by private `self._rules`; `rules` property returns copy - Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile - Fix AttributeError from property assignment during `__init__` - Replace hidden singleton factory with explicit builder + global state: - `app/rules/factory.py::build_rules_engine()` builds and logs totals - `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF - `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()` - Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts - Unify logging wiring: - `wire_logging_once(app)` clears and attaches a single handler chain - Create two named loggers: `sneakyscope.app` and `sneakyscope.engine` - Disable propagation to prevent dupes; include pid/logger name in format - Remove stray/duplicate handlers and import-time logging - Optional dedup filter for bursty repeats (kept off by default) - Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration - Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`) - Jinja: replace `{% return %}` with structured `if/elif/else` branches - Add toggle button to show raw JSON for TLS/CT section - Consumers should import the rules engine via: - `from app.state import get_rules_engine` - Use `build_rules_engine()` **only** during preload/init to construct the instance, then publish with `set_rules_engine()`. Do not call old singleton factories. - New/changed modules (high level): - `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()` - `app/rules/function_rules.py` (±) — normalized payload returns - `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs - `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes - `app/rules/factory.py` (±) — pure builder; totals logged post-registration - `app/state.py` (+) — process-global rules engine - `app/logging_setup.py` (±) — single chain, two named loggers - `app/wsgi.py` (±) — preload build + `set_rules_engine()` - `entrypoint.sh` (±) — add `--preload` - templates (±) — TLS card, raw toggle; front-page checkbox Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
2025-08-21 22:05:16 -05:00
parent f639ad0934
commit 693f7d67b9
22 changed files with 1476 additions and 256 deletions
--- a/app/rules/factory.py
+++ b/app/rules/factory.py
@@ -0,0 +1,51 @@
+# app/rules/factory.py
+from pathlib import Path
+
+from app.logging_setup import get_engine_logger
+from app.rules.rules_engine import RuleEngine
+from app.rules.rules_engine import Rule
+from app.rules.function_rules import FunctionRuleAdapter
+from app.rules.function_rules import (
+    form_action_missing, form_http_on_https_page, form_submits_to_different_host,
+    script_src_uses_data_or_blob, script_src_has_dangerous_extension, script_third_party_host,
+)
+
+from app.rules.rules_engine import load_rules_from_yaml  
+
+base_dir = Path(__file__).resolve().parent.parent
+RULES_FILE_PATH = base_dir / "config" / "suspicious_rules.yaml"
+
+log = get_engine_logger()
+
+def build_rules_engine() -> RuleEngine:
+    eng = RuleEngine()
+
+    # 1) YAML rules
+    yaml_rules = load_rules_from_yaml(RULES_FILE_PATH)
+    for r in yaml_rules:
+        eng.add_rule(r)
+    log.info("Found %d suspicious rules from %s",
+             len(yaml_rules), getattr(yaml_rules, "source_path", "config"))
+
+    # 2) Function rules
+    from app.rules.function_rules import FactAdapter
+    adapter = FactAdapter()
+
+    def add(rule: Rule):
+        eng.add_rule(rule)
+
+    add(Rule("form_action_missing", "Form has no action attribute", "form", "function",
+             FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing")))
+    add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
+             FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
+    add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
+             FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
+    add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
+             FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
+    add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
+             FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
+    add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
+             FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
+
+    log.info("Registered %d total rules (YAML + function)", len(eng.rules))
+    return eng
--- a/app/rules/function_rules.py
+++ b/app/rules/function_rules.py
@@ -19,10 +19,16 @@ Note:

 from __future__ import annotations

-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional, Tuple
+import inspect
+import logging
 from urllib.parse import urlparse

-_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
+from app.logging_setup import get_app_logger
+
+app_logger = get_app_logger()
+
+_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:", "about:blank"}

 # ---------------------------------------------------------------------------
 # Adapters
@@ -36,9 +42,6 @@ class FactAdapter:
    You can expand the per-category parsers over time as needed.
    """

-    def __init__(self, logger: Optional[Any] = None) -> None:
-        self.logger = logger
-
    def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
        """
        Adapt text_or_facts (str or dict) into a facts dict.
@@ -65,13 +68,11 @@ class FactAdapter:
            elif category == "text":
                return {"category": "text", "raw": text_or_facts}
            else:
-                if self.logger:
-                    self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
+                app_logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
                return {"category": category, "raw": text_or_facts}

        # Fallback for unrecognized input types
-        if self.logger:
-            self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
+        app_logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
        return {"category": category, "raw": text_or_facts}

    # ---- Per-category parsers ----
@@ -109,23 +110,149 @@ class FactAdapter:

 class FunctionRuleAdapter:
    """
-    Callable wrapper that adapts engine input (str or dict) into 'facts' and then
-    invokes the underlying function rule that expects a facts dict.
+    Wraps a function-based rule so it ALWAYS returns:
+        - match:    (True,  Dict[str, Any])
+        - no match: (False, None)

-    Usage:
-        wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger))
-        matched, reason = wrapped("action=https://...")  # engine-friendly
+    Also adapts non-dict inputs into facts via a provided 'adapter' using a
+    duck-typed protocol, so callers can pass raw items (e.g., strings/nodes).
    """

-    def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None:
+    def __init__(
+        self,
+        fn: Callable[[Dict[str, Any]], Any],
+        category: str,
+        adapter: Optional[Any] = None,
+        rule_name: Optional[str] = None,
+        logger: Optional[logging.Logger] = None,
+    ):
        self.fn = fn
        self.category = category
-        self.adapter = adapter or FactAdapter()
+        self.adapter = adapter
+        self.rule_name = rule_name or getattr(fn, "__name__", "<anonymous>")
+               

-    def __call__(self, text_or_facts: Any):
-        facts = self.adapter.adapt(text_or_facts, category=self.category)
-        return self.fn(facts)
+    # ---------- helpers ----------

+    def _adapt_to_facts(self, raw: Any) -> Optional[Dict[str, Any]]:
+        """
+        Convert whatever the engine passed into a facts dict.
+        Tries the provided adapter using a duck-typed protocol.
+        Returns a dict, or None if we can't adapt.
+        """
+        # Already a dict? Use it.
+        if isinstance(raw, dict):
+            return raw
+
+        # Try adapter if provided
+        if self.adapter is not None:
+            # Preferred generic signatures
+            for meth in ("build_facts", "facts", "to_facts"):
+                fn = getattr(self.adapter, meth, None)
+                if callable(fn):
+                    try:
+                        facts = fn(self.category, raw)
+                        if isinstance(facts, dict):
+                            return facts
+                    except Exception as exc:
+                        app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
+
+            # Category-specific fallbacks: build_<category>_facts / <category>_facts
+            cands = (f"build_{self.category}_facts", f"{self.category}_facts")
+            for meth in cands:
+                fn = getattr(self.adapter, meth, None)
+                if callable(fn):
+                    try:
+                        facts = fn(raw)
+                        if isinstance(facts, dict):
+                            return facts
+                    except Exception as exc:
+                        app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
+
+        # No way to adapt
+        return None
+
+    def _coerce_return(self, outcome: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
+        """
+        Normalize rule function returns:
+
+          accepted:
+            (bool, dict|None)
+            (bool, str)          -> dict {'note': str} on match
+            (bool,) or bool      -> (bool, None)
+
+          On invalid shapes, treat as no-match.
+        """
+        # Exact 2-tuple
+        if isinstance(outcome, tuple) and len(outcome) == 2:
+            matched = bool(outcome[0])
+            raw = outcome[1]
+
+            if not matched:
+                return False, None
+
+            if raw is None:
+                return True, {}  # match with empty payload is fine
+            if isinstance(raw, dict):
+                return True, raw
+            if isinstance(raw, str):
+                return True, {"note": raw}
+
+            app_logger.warning("[Rule] '%s' returned payload of invalid type: %s",
+                             self.rule_name, type(raw).__name__)
+            # Still treat as match but give minimal payload
+            return True, {"note": "coerced-invalid-payload", "value_repr": repr(raw)}
+
+        # Legacy: (bool,) or bare bool
+        if isinstance(outcome, tuple) and len(outcome) == 1 and isinstance(outcome[0], bool):
+            return (True, {}) if outcome[0] else (False, None)
+        if isinstance(outcome, bool):
+            return (True, {}) if outcome else (False, None)
+
+        # Junk -> no match
+        app_logger.warning("[Rule] '%s' returned invalid shape: %s",
+                         self.rule_name, type(outcome).__name__)
+        return False, None
+
+    # ---------- callable ----------
+
+    def __call__(self, raw: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
+        """
+        Apply the wrapped rule to the provided item (raw or facts).
+        Returns:
+            (True,  dict) on match
+            (False, None) on no match
+        """
+        facts = self._adapt_to_facts(raw)
+        if facts is None:
+            app_logger.warning("[Rule] '%s' received non-dict facts (%s). Coercing to miss.",
+                             self.rule_name, type(raw).__name__)
+            return False, None
+
+        try:
+            outcome = self.fn(facts)
+        except Exception as exc:
+            app_logger.exception("[Rule] '%s' raised: %s", self.rule_name, exc)
+            return False, None
+
+        matched, payload = self._coerce_return(outcome)
+        return matched, payload
+
+
+def _hit(payload: Optional[Dict[str, Any]] = None) -> Tuple[bool, Optional[Dict[str, Any]]]:
+    """
+    Standardize a positive match result: (True, dict)
+    """
+    if payload is None:
+        payload = {}
+    return True, payload
+
+
+def _miss() -> Tuple[bool, Optional[Dict[str, Any]]]:
+    """
+    Standardize a negative match result: (False, None)
+    """
+    return False, None

 # ---------------------------------------------------------------------------
 # Function-based rules (dict 'facts' expected)
@@ -133,20 +260,25 @@ class FunctionRuleAdapter:

 # ---------------- Script rules ----------------

-def script_src_uses_data_or_blob(facts: Dict[str, Any]):
+def script_src_uses_data_or_blob(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags <script> tags with src='data:' or 'blob:'."""
    src = facts.get("src") or ""
    if isinstance(src, str) and src.startswith(("data:", "blob:")):
        scheme = src.split(":", 1)[0]
-        return True, f"Script src uses {scheme}: URL"
-    return False, None
+        return _hit({
+            "scheme": scheme,
+            "src": src,
+            "note": f"Script src uses {scheme}: URL"
+        })
+    return _miss()


-def script_src_has_dangerous_extension(facts: Dict[str, Any]):
+def script_src_has_dangerous_extension(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
    src = facts.get("src") or ""
    if not isinstance(src, str):
-        return False, None
+        return _miss()
+
    low = src.lower()
    dangerous = (".vbs", ".hta")
    i = 0
@@ -154,31 +286,43 @@ def script_src_has_dangerous_extension(facts: Dict[str, Any]):
    while i < m:
        ext = dangerous[i]
        if low.endswith(ext):
-            return True, f"External script has dangerous extension ({ext})"
+            return _hit({
+                "ext": ext,
+                "src": src,
+                "note": f"External script has dangerous extension ({ext})"
+            })
        i = i + 1
-    return False, None
+
+    return _miss()


-def script_third_party_host(facts: Dict[str, Any]):
+def script_third_party_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags scripts loaded from a different hostname than the page."""
    base_host = facts.get("base_hostname") or ""
    src_host = facts.get("src_hostname") or ""
    if base_host and src_host and base_host != src_host:
-        return True, f"Third-party script host: {src_host}"
-    return False, None
+        return _hit({
+            "base_host": base_host,
+            "src_host": src_host,
+            "note": f"Third-party script host: {src_host}"
+        })
+    return _miss()


 # ---------------- Form rules ----------------

-def form_action_missing(facts: Dict[str, Any]):
+def form_action_missing(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags <form> elements with no meaningful action attribute."""
    action = (facts.get("action") or "").strip()
    if action in _NOOP_ACTIONS:
-        return True, "Form has no action attribute (or uses a no-op action)"
-    return False, None
+        return _hit({
+            "action": action,
+            "note": "Form has no action attribute (or uses a no-op action)"
+        })
+    return _miss()


-def form_http_on_https_page(facts: Dict[str, Any]):
+def form_http_on_https_page(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags forms submitting over HTTP while the page was loaded over HTTPS."""
    base_url = (facts.get("base_url") or "").strip()
    action   = (facts.get("action") or "").strip()
@@ -188,30 +332,38 @@ def form_http_on_https_page(facts: Dict[str, Any]):
        parsed_act  = urlparse(action)
        act_scheme  = (parsed_act.scheme or "").lower()
    except Exception:
-        return False, None  # parsing trouble → don’t flag
+        return _miss()  # parsing trouble → don’t flag

    # Only flag absolute http:// actions on https pages.
-    # Relative or schemeless ('//host/...') isn’t flagged here (it won’t be HTTP on an HTTPS page).
    if base_scheme == "https" and act_scheme == "http":
-        return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
-    return False, None
+        return _hit({
+            "base_url": base_url,
+            "action": parsed_act.geturl(),
+            "note": "Submits over insecure HTTP"
+        })
+    return _miss()


-def form_submits_to_different_host(facts: Dict[str, Any]):
+def form_submits_to_different_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
    """Flags <form> actions that submit to a different hostname than the page."""
    base_host = (facts.get("base_hostname") or "").strip().lower()
    action    = (facts.get("action") or "").strip()

    if not action or action in _NOOP_ACTIONS:
-        return False, None
+        return _miss()

    try:
        parsed = urlparse(action)
        act_host = (parsed.hostname or "").lower()
    except Exception:
-        return False, None
+        return _miss()

    # Only compare when the action specifies a host (absolute URL or schemeless //host/path).
    if act_host and base_host and act_host != base_host:
-        return True, f"Submits to a different host ({act_host} vs {base_host})"
-    return False, None
+        return _hit({
+            "base_host": base_host,
+            "act_host": act_host,
+            "action": action,
+            "note": "Submits to a different host"
+        })
+    return _miss()
--- a/app/rules/rules_engine.py
+++ b/app/rules/rules_engine.py
@@ -0,0 +1,352 @@
+"""
+rules_engine.py
+"""
+
+import re
+import logging
+from dataclasses import dataclass, asdict, field
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+from app.logging_setup import get_engine_logger
+from app.utils.settings import get_settings
+
+settings = get_settings()
+
+import yaml
+
+try:
+    # Flask is optional; engine still works without it.
+    from flask import current_app, has_app_context
+except Exception:
+    current_app = None  # type: ignore
+    def has_app_context() -> bool:  # type: ignore
+        return False
+
+logger = get_engine_logger()
+
+@dataclass
+class Rule:
+    """
+    Represents a single detection rule.
+
+    When rule_type == 'regex', 'pattern' must be provided.
+    When rule_type == 'function', 'function' must be provided and return (matched: bool, reason: str).
+    """
+    name: str
+    description: str
+    category: str
+    rule_type: str = "regex"
+    pattern: Optional[str] = None
+    function: Optional[Callable[[str], Tuple[bool, str]]] = None
+    severity: Optional[str] = None             # 'low' | 'medium' | 'high' (optional)
+    tags: Optional[List[str]] = field(default=None)  # e.g., ['obfuscation', 'phishing'] (optional)
+
+    # Internal compiled regex cache (not serialized)
+    _compiled_regex: Optional[re.Pattern] = field(default=None, repr=False, compare=False)
+
+    def compile_if_needed(self) -> bool:
+        """
+        Compile the regex pattern once for performance, if applicable.
+
+        Returns:
+            bool: True if the regex is compiled and ready, False otherwise.
+        """
+
+        if self.rule_type == "regex" and self.pattern:
+            try:
+                self._compiled_regex = re.compile(self.pattern, re.IGNORECASE)
+                logger.debug(f"[Rule] Compiled regex for '{self.name}'")
+                return True
+            except re.error as rex:
+                self._compiled_regex = None
+                logger.warning(f"[Rule] Failed to compile regex for '{self.name}': {rex}")
+                return False
+        return False
+
+    def run(self, text: str) -> Tuple[bool, str]:
+        """
+        Run the rule on the given text.
+
+        Returns:
+            (matched: bool, reason: str)
+        """
+        if self.rule_type == "regex":
+            if not self.pattern:
+                logger.warning(f"[Rule] '{self.name}' missing regex pattern.")
+                return False, "Invalid rule configuration: missing pattern"
+
+            if self._compiled_regex is None:
+                compiled_ok = self.compile_if_needed()
+                if not compiled_ok:
+                    return False, f"Invalid regex pattern: {self.pattern!r}"
+
+            if self._compiled_regex and self._compiled_regex.search(text):
+                return True, f"Matched regex '{self.pattern}' → {self.description}"
+            return False, "No match"
+
+        if self.rule_type == "function":
+            if callable(self.function):
+                try:
+                    matched, reason = self.function(text)
+                    if isinstance(matched, bool) and isinstance(reason, str):
+                        return matched, reason
+                    logger.warning(f"[Rule] '{self.name}' function returned invalid types.")
+                    return False, "Invalid function return type; expected (bool, str)"
+                except Exception as exc:
+                    logger.exception(f"[Rule] '{self.name}' function raised exception.")
+                    return False, f"Rule function raised exception: {exc!r}"
+            logger.warning(f"[Rule] '{self.name}' has invalid function configuration.")
+            return False, "Invalid rule configuration: function not callable"
+
+        logger.warning(f"[Rule] '{self.name}' has unknown type '{self.rule_type}'.")
+        return False, f"Invalid rule configuration: unknown type '{self.rule_type}'"
+
+
+@dataclass
+class RuleResult:
+    """
+    Uniform per-rule outcome for UI/API consumption.
+
+    result is "PASS" or "FAIL" (FAIL == matched True)
+    """
+    name: str
+    description: str
+    category: str
+    result: str                  # "PASS" | "FAIL"
+    reason: Optional[str] = None
+    severity: Optional[str] = None
+    tags: Optional[List[str]] = None
+
+
+class RuleEngine:
+    """
+    Loads and executes rules against provided text, with Flask-aware logging.
+    """
+
+    def __init__(self, rules: Optional[List[Rule]] = None):
+        """
+        Args:
+            rules: Optional initial rule list.
+        """
+
+        # IMPORTANT: back the property with a private list
+        self._rules = []                 # was: self.rules = []
+        self._rule_keys = set()
+        self._rule_index = {}
+
+        # If a list of rules was provided, add them via add_rule so compilation happens
+        if rules:
+            i = 0
+            n = len(rules)
+            while i < n:
+                self.add_rule(rules[i])  # compiles regex as needed
+                i = i + 1
+
+    def add_rule(self, rule: Rule, replace: bool = False) -> None:
+        """
+        Add a new rule at runtime; compiles regex if needed and logs failures.
+
+        Idempotent by (category, name):
+        - If the same (category, name) is already present:
+            * replace=False (default): ignore duplicate and warn.
+            * replace=True: replace the existing rule in place and recompile regex.
+
+        Args:
+            rule:    Rule to add.
+            replace: If True, overwrite an existing rule with the same (category, name).
+        """
+        # Ensure tracking structures exist in case __init__ wasn’t updated somewhere
+        if not hasattr(self, "_rule_keys"):
+            self._rule_keys = set()
+        if not hasattr(self, "_rule_index"):
+            self._rule_index = {}
+            i = 0
+            length = len(getattr(self, "_rules", []))
+            while i < length:
+                existing = self._rules[i]
+                key_i = (existing.category, existing.name)
+                self._rule_keys.add(key_i)
+                self._rule_index[key_i] = i
+                i = i + 1
+
+        key = (rule.category, rule.name)
+
+        if key in self._rule_keys:
+            if not replace:
+                try:
+                    logger.warning("[Rules] Duplicate registration ignored: %s/%s", rule.category, rule.name)
+                except Exception:
+                    pass
+                return
+
+            # Replace existing rule in place
+            idx = self._rule_index.get(key)
+            if idx is None:
+                idx = len(self._rules)
+                self._rules.append(rule)
+                self._rule_index[key] = idx
+            else:
+                self._rules[idx] = rule
+
+            if rule.rule_type == "regex":
+                compiled_ok = rule.compile_if_needed()
+                if not compiled_ok:
+                    logger.warning(
+                        "[Engine] Regex failed when replacing rule '%s' (pattern=%r)",
+                        rule.name, getattr(rule, "pattern", None)
+                    )
+            return
+        
+        if settings.app.print_rule_loads:
+            logger.info(
+                    "[engine] add_rule: %s/%s replace=%s -> count=%d",
+                    rule.category, rule.name, bool(replace), len(self._rules)
+                )
+            
+        # New rule path
+        self._rules.append(rule)
+        self._rule_keys.add(key)
+        self._rule_index[key] = len(self._rules) - 1
+
+        if rule.rule_type == "regex":
+            compiled_ok = rule.compile_if_needed()
+            if not compiled_ok:
+                logger.warning(
+                    "[Engine] Regex failed when adding rule '%s' (pattern=%r)",
+                    rule.name, getattr(rule, "pattern", None)
+                )
+
+                
+
+    # helper, not used ATM
+    def add_rules(self, rules: list[Rule], replace: bool = False) -> None:
+        """
+        Add many rules safely (idempotent). Uses the same semantics as add_rule.
+        """
+        i = 0
+        n = len(rules)
+        while i < n:
+            self.add_rule(rules[i], replace=replace)
+            i = i + 1
+
+    def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
+        """
+        Run all rules against text.
+
+        Args:
+            text: The content to test.
+            category: If provided, only evaluate rules that match this category.
+
+        Returns:
+            List of dicts with PASS/FAIL per rule (JSON-serializable).
+        """
+        results: List[Dict] = []
+
+        index = 0
+        total = len(self.rules)
+        while index < total:
+            rule = self.rules[index]
+
+            if category is not None and rule.category != category:
+                index = index + 1
+                continue
+
+            matched, reason = rule.run(text)
+
+            result_str = "FAIL" if matched else "PASS"
+            reason_to_include: Optional[str]
+            if matched:
+                reason_to_include = reason
+            else:
+                reason_to_include = None
+
+            rr = RuleResult(
+                name=rule.name,
+                description=rule.description,
+                category=rule.category,
+                result=result_str,
+                reason=reason_to_include,
+                severity=rule.severity,
+                tags=rule.tags,
+            )
+
+            results.append(asdict(rr))
+            index = index + 1
+
+        logger.debug(f"[Engine] Completed evaluation. Returned {len(results)} rule results.")
+        return results
+
+    @property
+    def rules(self) -> List[Rule]:
+        """Read-only view (returns a shallow copy) of registered rules."""
+        return list(self._rules)
+
+
+
+
+
+
+
+def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
+    """
+    Load rules from a YAML file.
+
+    Supports optional 'severity' and 'tags' keys.
+
+    Example YAML:
+      - name: suspicious_eval
+        description: "Use of eval() in script"
+        category: script
+        type: regex
+        pattern: "\\beval\\("
+        severity: medium
+        tags: [obfuscation]
+
+    Returns:
+        List[Rule]
+    """
+    
+    rules: List[Rule] = []
+
+    path = Path(yaml_file)
+    with path.open("r", encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+
+    if not isinstance(data, list):
+        logger.error("[Loader] Rules YAML must be a list of rule objects.")
+        raise ValueError("Rules YAML must be a list of rule objects.")
+
+    idx = 0
+    total = len(data)
+    while idx < total:
+        item = data[idx]
+
+        name = item.get("name")
+        description = item.get("description")
+        category = item.get("category")
+        rule_type = item.get("type", "regex")
+        pattern = item.get("pattern")
+        severity = item.get("severity")
+        tags = item.get("tags")
+
+        if not name or not description or not category:
+            logger.warning(f"[Loader] Skipping invalid rule at index {idx}: missing required fields.")
+            idx = idx + 1
+            continue
+
+        rule = Rule(
+            name=name,
+            description=description,
+            category=category,
+            rule_type=rule_type,
+            pattern=pattern,
+            function=None,  # function rules should be registered in code
+            severity=severity,
+            tags=tags if isinstance(tags, list) else None,
+        )
+
+        rules.append(rule)
+        idx = idx + 1
+
+    return rules