""" app/rules/function_rules.py Class-based adapters + function-based rules for SneakyScope. Design: - FactAdapter: converts text snippets into structured 'facts' dicts by category. - FunctionRuleAdapter: wraps a rule function (expects dict facts) so it can be used directly by the RuleEngine even when the engine is given strings. Each rule returns (matched: bool, reason: Optional[str]). If matched is True, 'reason' should explain why. Note: - Form rules work today with text snippets, thanks to FunctionRuleAdapter+FactAdapter. - Script rules expect per-script dict facts (src/base_hostname/etc.). They are registered now and will fully activate when you evaluate per-script contexts. """ from __future__ import annotations from typing import Any, Callable, Dict, Optional, Tuple import inspect import logging from urllib.parse import urlparse from app.logging_setup import get_app_logger app_logger = get_app_logger() _NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:", "about:blank"} # --------------------------------------------------------------------------- # Adapters # --------------------------------------------------------------------------- class FactAdapter: """ Converts raw text/html snippets into structured 'facts' suitable for function-based rules. If input is already a dict, returns it unchanged. You can expand the per-category parsers over time as needed. """ def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]: """ Adapt text_or_facts (str or dict) into a facts dict. Args: text_or_facts: Either raw string snippet or an already-structured dict. category: 'form' | 'script' | 'text' | ... (used to choose parser) """ # Already structured — pass through if isinstance(text_or_facts, dict): # Ensure a category key for consistency (optional) text_or_facts.setdefault("category", category or text_or_facts.get("category") or "") return text_or_facts # String snippets are parsed by category if isinstance(text_or_facts, str): if category == "form": return self._adapt_form_snippet(text_or_facts) elif category == "script": # For now, we don't parse script snippets into facts. Script rules expect # per-script dicts (src/base_hostname/etc.), which you'll provide when you # add per-script evaluation. Return minimal facts for safety. return {"category": "script", "raw": text_or_facts} elif category == "text": return {"category": "text", "raw": text_or_facts} else: app_logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.") return {"category": category, "raw": text_or_facts} # Fallback for unrecognized input types app_logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}") return {"category": category, "raw": text_or_facts} # ---- Per-category parsers ---- def _adapt_form_snippet(self, snippet: str) -> Dict[str, Any]: """ Parse the simple form snippet format used by browser.py today, e.g.: action=https://example.com/post method=post inputs= - name=email type=text - name=password type=password Only extracts fields needed by current function rules. """ facts: Dict[str, Any] = {"category": "form", "raw": snippet} lines = snippet.splitlines() i = 0 n = len(lines) while i < n: line = (lines[i] or "").strip() if line.startswith("action="): facts["action"] = line.split("=", 1)[1].strip() elif line.startswith("method="): facts["method"] = line.split("=", 1)[1].strip() i = i + 1 # Normalize context keys expected by form rules facts.setdefault("base_url", "") # filled by caller later if desired facts.setdefault("base_hostname", "") # filled by caller later if desired return facts class FunctionRuleAdapter: """ Wraps a function-based rule so it ALWAYS returns: - match: (True, Dict[str, Any]) - no match: (False, None) Also adapts non-dict inputs into facts via a provided 'adapter' using a duck-typed protocol, so callers can pass raw items (e.g., strings/nodes). """ def __init__( self, fn: Callable[[Dict[str, Any]], Any], category: str, adapter: Optional[Any] = None, rule_name: Optional[str] = None, logger: Optional[logging.Logger] = None, ): self.fn = fn self.category = category self.adapter = adapter self.rule_name = rule_name or getattr(fn, "__name__", "") # ---------- helpers ---------- def _adapt_to_facts(self, raw: Any) -> Optional[Dict[str, Any]]: """ Convert whatever the engine passed into a facts dict. Tries the provided adapter using a duck-typed protocol. Returns a dict, or None if we can't adapt. """ # Already a dict? Use it. if isinstance(raw, dict): return raw # Try adapter if provided if self.adapter is not None: # Preferred generic signatures for meth in ("build_facts", "facts", "to_facts"): fn = getattr(self.adapter, meth, None) if callable(fn): try: facts = fn(self.category, raw) if isinstance(facts, dict): return facts except Exception as exc: app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc) # Category-specific fallbacks: build__facts / _facts cands = (f"build_{self.category}_facts", f"{self.category}_facts") for meth in cands: fn = getattr(self.adapter, meth, None) if callable(fn): try: facts = fn(raw) if isinstance(facts, dict): return facts except Exception as exc: app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc) # No way to adapt return None def _coerce_return(self, outcome: Any) -> Tuple[bool, Optional[Dict[str, Any]]]: """ Normalize rule function returns: accepted: (bool, dict|None) (bool, str) -> dict {'note': str} on match (bool,) or bool -> (bool, None) On invalid shapes, treat as no-match. """ # Exact 2-tuple if isinstance(outcome, tuple) and len(outcome) == 2: matched = bool(outcome[0]) raw = outcome[1] if not matched: return False, None if raw is None: return True, {} # match with empty payload is fine if isinstance(raw, dict): return True, raw if isinstance(raw, str): return True, {"note": raw} app_logger.warning("[Rule] '%s' returned payload of invalid type: %s", self.rule_name, type(raw).__name__) # Still treat as match but give minimal payload return True, {"note": "coerced-invalid-payload", "value_repr": repr(raw)} # Legacy: (bool,) or bare bool if isinstance(outcome, tuple) and len(outcome) == 1 and isinstance(outcome[0], bool): return (True, {}) if outcome[0] else (False, None) if isinstance(outcome, bool): return (True, {}) if outcome else (False, None) # Junk -> no match app_logger.warning("[Rule] '%s' returned invalid shape: %s", self.rule_name, type(outcome).__name__) return False, None # ---------- callable ---------- def __call__(self, raw: Any) -> Tuple[bool, Optional[Dict[str, Any]]]: """ Apply the wrapped rule to the provided item (raw or facts). Returns: (True, dict) on match (False, None) on no match """ facts = self._adapt_to_facts(raw) if facts is None: app_logger.warning("[Rule] '%s' received non-dict facts (%s). Coercing to miss.", self.rule_name, type(raw).__name__) return False, None try: outcome = self.fn(facts) except Exception as exc: app_logger.exception("[Rule] '%s' raised: %s", self.rule_name, exc) return False, None matched, payload = self._coerce_return(outcome) return matched, payload def _hit(payload: Optional[Dict[str, Any]] = None) -> Tuple[bool, Optional[Dict[str, Any]]]: """ Standardize a positive match result: (True, dict) """ if payload is None: payload = {} return True, payload def _miss() -> Tuple[bool, Optional[Dict[str, Any]]]: """ Standardize a negative match result: (False, None) """ return False, None # --------------------------------------------------------------------------- # Function-based rules (dict 'facts' expected) # --------------------------------------------------------------------------- # ---------------- Script rules ---------------- def script_src_uses_data_or_blob(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]: """Flags