Files
SneakyScope/app/rules/function_rules.py
Phillip Tarrant 3a24b392f2 feat: on-demand external script analysis + code viewer; refactor form analysis to rule engine
- API: add `POST /api/analyze_script` (app/blueprints/api.py)
  - Fetch one external script to artifacts, run rules, return findings + snippet
  - Uses new ExternalScriptFetcher (results_path aware) and job UUID
  - Returns: { ok, final_url, status_code, bytes, truncated, sha256, artifact_path, findings[], snippet, snippet_len }
  - TODO: document in openapi/openapi.yaml

- Fetcher: update `app/utils/external_fetch.py`
  - Constructed with `results_path` (UUID dir); writes to `<results_path>/scripts/fetched/<index>.js`
  - Loads settings via `get_settings()`, logs via std logging

- UI (results.html):
  - Move “Analyze external script” action into **Content Snippet** column for external rows
  - Clicking replaces button with `<details>` snippet, shows rule matches, and adds “open in viewer” link
  - Robust fetch handler (checks JSON, shows errors); builds viewer URL from absolute artifact path

- Viewer:
  - New route: `GET /view/artifact/<run_uuid>/<path:filename>` (app/blueprints/ui.py)
  - New template: Monaco-based read-only code viewer (viewer.html)
  - Removes SRI on loader to avoid integrity block; loads file via `raw_url` and detects language by extension

- Forms:
  - Refactor `analyze_forms` to mirror scripts analysis:
    - Uses rule engine (`category == "form"`) across regex/function rules
    - Emits rows only when matches exist
    - Includes `content_snippet`, `action`, `method`, `inputs`, `rules`
  - Replace legacy plumbing (`flagged`, `flag_reasons`, `status`) in output
  - Normalize form function rules to canonical returns `(bool, Optional[str])`:
    - `form_action_missing`
    - `form_http_on_https_page`
    - `form_submits_to_different_host`
    - Add minor hardening (lowercasing hosts, no-op actions, clearer reasons)

- CSS: add `.forms-table` to mirror `.scripts-table` (5 columns)
  - Fixed table layout, widths per column, chip/snippet styling, responsive tweaks

- Misc:
  - Fix “working outside app context” issue by avoiding `current_app` at import time (left storage logic inside routes)
  - Add “View Source” link to open page source in viewer

Refs:
- Roadmap: mark “Source code viewer” done; keep TODO to add `/api/analyze_script` to OpenAPI
2025-08-21 15:32:24 -05:00

217 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
app/rules/function_rules.py
Class-based adapters + function-based rules for SneakyScope.
Design:
- FactAdapter: converts text snippets into structured 'facts' dicts by category.
- FunctionRuleAdapter: wraps a rule function (expects dict facts) so it can be
used directly by the RuleEngine even when the engine is given strings.
Each rule returns (matched: bool, reason: Optional[str]).
If matched is True, 'reason' should explain why.
Note:
- Form rules work today with text snippets, thanks to FunctionRuleAdapter+FactAdapter.
- Script rules expect per-script dict facts (src/base_hostname/etc.). They are
registered now and will fully activate when you evaluate per-script contexts.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
from urllib.parse import urlparse
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
# ---------------------------------------------------------------------------
# Adapters
# ---------------------------------------------------------------------------
class FactAdapter:
"""
Converts raw text/html snippets into structured 'facts' suitable for
function-based rules. If input is already a dict, returns it unchanged.
You can expand the per-category parsers over time as needed.
"""
def __init__(self, logger: Optional[Any] = None) -> None:
self.logger = logger
def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
"""
Adapt text_or_facts (str or dict) into a facts dict.
Args:
text_or_facts: Either raw string snippet or an already-structured dict.
category: 'form' | 'script' | 'text' | ... (used to choose parser)
"""
# Already structured — pass through
if isinstance(text_or_facts, dict):
# Ensure a category key for consistency (optional)
text_or_facts.setdefault("category", category or text_or_facts.get("category") or "")
return text_or_facts
# String snippets are parsed by category
if isinstance(text_or_facts, str):
if category == "form":
return self._adapt_form_snippet(text_or_facts)
elif category == "script":
# For now, we don't parse script snippets into facts. Script rules expect
# per-script dicts (src/base_hostname/etc.), which you'll provide when you
# add per-script evaluation. Return minimal facts for safety.
return {"category": "script", "raw": text_or_facts}
elif category == "text":
return {"category": "text", "raw": text_or_facts}
else:
if self.logger:
self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
return {"category": category, "raw": text_or_facts}
# Fallback for unrecognized input types
if self.logger:
self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
return {"category": category, "raw": text_or_facts}
# ---- Per-category parsers ----
def _adapt_form_snippet(self, snippet: str) -> Dict[str, Any]:
"""
Parse the simple form snippet format used by browser.py today, e.g.:
action=https://example.com/post
method=post
inputs=
- name=email type=text
- name=password type=password
Only extracts fields needed by current function rules.
"""
facts: Dict[str, Any] = {"category": "form", "raw": snippet}
lines = snippet.splitlines()
i = 0
n = len(lines)
while i < n:
line = (lines[i] or "").strip()
if line.startswith("action="):
facts["action"] = line.split("=", 1)[1].strip()
elif line.startswith("method="):
facts["method"] = line.split("=", 1)[1].strip()
i = i + 1
# Normalize context keys expected by form rules
facts.setdefault("base_url", "") # filled by caller later if desired
facts.setdefault("base_hostname", "") # filled by caller later if desired
return facts
class FunctionRuleAdapter:
"""
Callable wrapper that adapts engine input (str or dict) into 'facts' and then
invokes the underlying function rule that expects a facts dict.
Usage:
wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger))
matched, reason = wrapped("action=https://...") # engine-friendly
"""
def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None:
self.fn = fn
self.category = category
self.adapter = adapter or FactAdapter()
def __call__(self, text_or_facts: Any):
facts = self.adapter.adapt(text_or_facts, category=self.category)
return self.fn(facts)
# ---------------------------------------------------------------------------
# Function-based rules (dict 'facts' expected)
# ---------------------------------------------------------------------------
# ---------------- Script rules ----------------
def script_src_uses_data_or_blob(facts: Dict[str, Any]):
"""Flags <script> tags with src='data:' or 'blob:'."""
src = facts.get("src") or ""
if isinstance(src, str) and src.startswith(("data:", "blob:")):
scheme = src.split(":", 1)[0]
return True, f"Script src uses {scheme}: URL"
return False, None
def script_src_has_dangerous_extension(facts: Dict[str, Any]):
"""Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
src = facts.get("src") or ""
if not isinstance(src, str):
return False, None
low = src.lower()
dangerous = (".vbs", ".hta")
i = 0
m = len(dangerous)
while i < m:
ext = dangerous[i]
if low.endswith(ext):
return True, f"External script has dangerous extension ({ext})"
i = i + 1
return False, None
def script_third_party_host(facts: Dict[str, Any]):
"""Flags scripts loaded from a different hostname than the page."""
base_host = facts.get("base_hostname") or ""
src_host = facts.get("src_hostname") or ""
if base_host and src_host and base_host != src_host:
return True, f"Third-party script host: {src_host}"
return False, None
# ---------------- Form rules ----------------
def form_action_missing(facts: Dict[str, Any]):
"""Flags <form> elements with no meaningful action attribute."""
action = (facts.get("action") or "").strip()
if action in _NOOP_ACTIONS:
return True, "Form has no action attribute (or uses a no-op action)"
return False, None
def form_http_on_https_page(facts: Dict[str, Any]):
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
base_url = (facts.get("base_url") or "").strip()
action = (facts.get("action") or "").strip()
try:
base_scheme = (urlparse(base_url).scheme or "").lower()
parsed_act = urlparse(action)
act_scheme = (parsed_act.scheme or "").lower()
except Exception:
return False, None # parsing trouble → dont flag
# Only flag absolute http:// actions on https pages.
# Relative or schemeless ('//host/...') isnt flagged here (it wont be HTTP on an HTTPS page).
if base_scheme == "https" and act_scheme == "http":
return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
return False, None
def form_submits_to_different_host(facts: Dict[str, Any]):
"""Flags <form> actions that submit to a different hostname than the page."""
base_host = (facts.get("base_hostname") or "").strip().lower()
action = (facts.get("action") or "").strip()
if not action or action in _NOOP_ACTIONS:
return False, None
try:
parsed = urlparse(action)
act_host = (parsed.hostname or "").lower()
except Exception:
return False, None
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
if act_host and base_host and act_host != base_host:
return True, f"Submits to a different host ({act_host} vs {base_host})"
return False, None