""" app/rules/function_rules.py Class-based adapters + function-based rules for SneakyScope. Design: - FactAdapter: converts text snippets into structured 'facts' dicts by category. - FunctionRuleAdapter: wraps a rule function (expects dict facts) so it can be used directly by the RuleEngine even when the engine is given strings. Each rule returns (matched: bool, reason: Optional[str]). If matched is True, 'reason' should explain why. Note: - Form rules work today with text snippets, thanks to FunctionRuleAdapter+FactAdapter. - Script rules expect per-script dict facts (src/base_hostname/etc.). They are registered now and will fully activate when you evaluate per-script contexts. """ from __future__ import annotations from typing import Any, Dict, Optional from urllib.parse import urlparse _NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"} # --------------------------------------------------------------------------- # Adapters # --------------------------------------------------------------------------- class FactAdapter: """ Converts raw text/html snippets into structured 'facts' suitable for function-based rules. If input is already a dict, returns it unchanged. You can expand the per-category parsers over time as needed. """ def __init__(self, logger: Optional[Any] = None) -> None: self.logger = logger def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]: """ Adapt text_or_facts (str or dict) into a facts dict. Args: text_or_facts: Either raw string snippet or an already-structured dict. category: 'form' | 'script' | 'text' | ... (used to choose parser) """ # Already structured — pass through if isinstance(text_or_facts, dict): # Ensure a category key for consistency (optional) text_or_facts.setdefault("category", category or text_or_facts.get("category") or "") return text_or_facts # String snippets are parsed by category if isinstance(text_or_facts, str): if category == "form": return self._adapt_form_snippet(text_or_facts) elif category == "script": # For now, we don't parse script snippets into facts. Script rules expect # per-script dicts (src/base_hostname/etc.), which you'll provide when you # add per-script evaluation. Return minimal facts for safety. return {"category": "script", "raw": text_or_facts} elif category == "text": return {"category": "text", "raw": text_or_facts} else: if self.logger: self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.") return {"category": category, "raw": text_or_facts} # Fallback for unrecognized input types if self.logger: self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}") return {"category": category, "raw": text_or_facts} # ---- Per-category parsers ---- def _adapt_form_snippet(self, snippet: str) -> Dict[str, Any]: """ Parse the simple form snippet format used by browser.py today, e.g.: action=https://example.com/post method=post inputs= - name=email type=text - name=password type=password Only extracts fields needed by current function rules. """ facts: Dict[str, Any] = {"category": "form", "raw": snippet} lines = snippet.splitlines() i = 0 n = len(lines) while i < n: line = (lines[i] or "").strip() if line.startswith("action="): facts["action"] = line.split("=", 1)[1].strip() elif line.startswith("method="): facts["method"] = line.split("=", 1)[1].strip() i = i + 1 # Normalize context keys expected by form rules facts.setdefault("base_url", "") # filled by caller later if desired facts.setdefault("base_hostname", "") # filled by caller later if desired return facts class FunctionRuleAdapter: """ Callable wrapper that adapts engine input (str or dict) into 'facts' and then invokes the underlying function rule that expects a facts dict. Usage: wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger)) matched, reason = wrapped("action=https://...") # engine-friendly """ def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None: self.fn = fn self.category = category self.adapter = adapter or FactAdapter() def __call__(self, text_or_facts: Any): facts = self.adapter.adapt(text_or_facts, category=self.category) return self.fn(facts) # --------------------------------------------------------------------------- # Function-based rules (dict 'facts' expected) # --------------------------------------------------------------------------- # ---------------- Script rules ---------------- def script_src_uses_data_or_blob(facts: Dict[str, Any]): """Flags