- API: add `POST /api/analyze_script` (app/blueprints/api.py)
- Fetch one external script to artifacts, run rules, return findings + snippet
- Uses new ExternalScriptFetcher (results_path aware) and job UUID
- Returns: { ok, final_url, status_code, bytes, truncated, sha256, artifact_path, findings[], snippet, snippet_len }
- TODO: document in openapi/openapi.yaml
- Fetcher: update `app/utils/external_fetch.py`
- Constructed with `results_path` (UUID dir); writes to `<results_path>/scripts/fetched/<index>.js`
- Loads settings via `get_settings()`, logs via std logging
- UI (results.html):
- Move “Analyze external script” action into **Content Snippet** column for external rows
- Clicking replaces button with `<details>` snippet, shows rule matches, and adds “open in viewer” link
- Robust fetch handler (checks JSON, shows errors); builds viewer URL from absolute artifact path
- Viewer:
- New route: `GET /view/artifact/<run_uuid>/<path:filename>` (app/blueprints/ui.py)
- New template: Monaco-based read-only code viewer (viewer.html)
- Removes SRI on loader to avoid integrity block; loads file via `raw_url` and detects language by extension
- Forms:
- Refactor `analyze_forms` to mirror scripts analysis:
- Uses rule engine (`category == "form"`) across regex/function rules
- Emits rows only when matches exist
- Includes `content_snippet`, `action`, `method`, `inputs`, `rules`
- Replace legacy plumbing (`flagged`, `flag_reasons`, `status`) in output
- Normalize form function rules to canonical returns `(bool, Optional[str])`:
- `form_action_missing`
- `form_http_on_https_page`
- `form_submits_to_different_host`
- Add minor hardening (lowercasing hosts, no-op actions, clearer reasons)
- CSS: add `.forms-table` to mirror `.scripts-table` (5 columns)
- Fixed table layout, widths per column, chip/snippet styling, responsive tweaks
- Misc:
- Fix “working outside app context” issue by avoiding `current_app` at import time (left storage logic inside routes)
- Add “View Source” link to open page source in viewer
Refs:
- Roadmap: mark “Source code viewer” done; keep TODO to add `/api/analyze_script` to OpenAPI
217 lines
8.2 KiB
Python
217 lines
8.2 KiB
Python
"""
|
||
app/rules/function_rules.py
|
||
|
||
Class-based adapters + function-based rules for SneakyScope.
|
||
|
||
Design:
|
||
- FactAdapter: converts text snippets into structured 'facts' dicts by category.
|
||
- FunctionRuleAdapter: wraps a rule function (expects dict facts) so it can be
|
||
used directly by the RuleEngine even when the engine is given strings.
|
||
|
||
Each rule returns (matched: bool, reason: Optional[str]).
|
||
If matched is True, 'reason' should explain why.
|
||
|
||
Note:
|
||
- Form rules work today with text snippets, thanks to FunctionRuleAdapter+FactAdapter.
|
||
- Script rules expect per-script dict facts (src/base_hostname/etc.). They are
|
||
registered now and will fully activate when you evaluate per-script contexts.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Dict, Optional
|
||
from urllib.parse import urlparse
|
||
|
||
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Adapters
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class FactAdapter:
|
||
"""
|
||
Converts raw text/html snippets into structured 'facts' suitable for
|
||
function-based rules. If input is already a dict, returns it unchanged.
|
||
|
||
You can expand the per-category parsers over time as needed.
|
||
"""
|
||
|
||
def __init__(self, logger: Optional[Any] = None) -> None:
|
||
self.logger = logger
|
||
|
||
def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
|
||
"""
|
||
Adapt text_or_facts (str or dict) into a facts dict.
|
||
|
||
Args:
|
||
text_or_facts: Either raw string snippet or an already-structured dict.
|
||
category: 'form' | 'script' | 'text' | ... (used to choose parser)
|
||
"""
|
||
# Already structured — pass through
|
||
if isinstance(text_or_facts, dict):
|
||
# Ensure a category key for consistency (optional)
|
||
text_or_facts.setdefault("category", category or text_or_facts.get("category") or "")
|
||
return text_or_facts
|
||
|
||
# String snippets are parsed by category
|
||
if isinstance(text_or_facts, str):
|
||
if category == "form":
|
||
return self._adapt_form_snippet(text_or_facts)
|
||
elif category == "script":
|
||
# For now, we don't parse script snippets into facts. Script rules expect
|
||
# per-script dicts (src/base_hostname/etc.), which you'll provide when you
|
||
# add per-script evaluation. Return minimal facts for safety.
|
||
return {"category": "script", "raw": text_or_facts}
|
||
elif category == "text":
|
||
return {"category": "text", "raw": text_or_facts}
|
||
else:
|
||
if self.logger:
|
||
self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
|
||
return {"category": category, "raw": text_or_facts}
|
||
|
||
# Fallback for unrecognized input types
|
||
if self.logger:
|
||
self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
|
||
return {"category": category, "raw": text_or_facts}
|
||
|
||
# ---- Per-category parsers ----
|
||
|
||
def _adapt_form_snippet(self, snippet: str) -> Dict[str, Any]:
|
||
"""
|
||
Parse the simple form snippet format used by browser.py today, e.g.:
|
||
|
||
action=https://example.com/post
|
||
method=post
|
||
inputs=
|
||
- name=email type=text
|
||
- name=password type=password
|
||
|
||
Only extracts fields needed by current function rules.
|
||
"""
|
||
facts: Dict[str, Any] = {"category": "form", "raw": snippet}
|
||
|
||
lines = snippet.splitlines()
|
||
i = 0
|
||
n = len(lines)
|
||
while i < n:
|
||
line = (lines[i] or "").strip()
|
||
if line.startswith("action="):
|
||
facts["action"] = line.split("=", 1)[1].strip()
|
||
elif line.startswith("method="):
|
||
facts["method"] = line.split("=", 1)[1].strip()
|
||
i = i + 1
|
||
|
||
# Normalize context keys expected by form rules
|
||
facts.setdefault("base_url", "") # filled by caller later if desired
|
||
facts.setdefault("base_hostname", "") # filled by caller later if desired
|
||
return facts
|
||
|
||
|
||
class FunctionRuleAdapter:
|
||
"""
|
||
Callable wrapper that adapts engine input (str or dict) into 'facts' and then
|
||
invokes the underlying function rule that expects a facts dict.
|
||
|
||
Usage:
|
||
wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger))
|
||
matched, reason = wrapped("action=https://...") # engine-friendly
|
||
"""
|
||
|
||
def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None:
|
||
self.fn = fn
|
||
self.category = category
|
||
self.adapter = adapter or FactAdapter()
|
||
|
||
def __call__(self, text_or_facts: Any):
|
||
facts = self.adapter.adapt(text_or_facts, category=self.category)
|
||
return self.fn(facts)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Function-based rules (dict 'facts' expected)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# ---------------- Script rules ----------------
|
||
|
||
def script_src_uses_data_or_blob(facts: Dict[str, Any]):
|
||
"""Flags <script> tags with src='data:' or 'blob:'."""
|
||
src = facts.get("src") or ""
|
||
if isinstance(src, str) and src.startswith(("data:", "blob:")):
|
||
scheme = src.split(":", 1)[0]
|
||
return True, f"Script src uses {scheme}: URL"
|
||
return False, None
|
||
|
||
|
||
def script_src_has_dangerous_extension(facts: Dict[str, Any]):
|
||
"""Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
|
||
src = facts.get("src") or ""
|
||
if not isinstance(src, str):
|
||
return False, None
|
||
low = src.lower()
|
||
dangerous = (".vbs", ".hta")
|
||
i = 0
|
||
m = len(dangerous)
|
||
while i < m:
|
||
ext = dangerous[i]
|
||
if low.endswith(ext):
|
||
return True, f"External script has dangerous extension ({ext})"
|
||
i = i + 1
|
||
return False, None
|
||
|
||
|
||
def script_third_party_host(facts: Dict[str, Any]):
|
||
"""Flags scripts loaded from a different hostname than the page."""
|
||
base_host = facts.get("base_hostname") or ""
|
||
src_host = facts.get("src_hostname") or ""
|
||
if base_host and src_host and base_host != src_host:
|
||
return True, f"Third-party script host: {src_host}"
|
||
return False, None
|
||
|
||
|
||
# ---------------- Form rules ----------------
|
||
|
||
def form_action_missing(facts: Dict[str, Any]):
|
||
"""Flags <form> elements with no meaningful action attribute."""
|
||
action = (facts.get("action") or "").strip()
|
||
if action in _NOOP_ACTIONS:
|
||
return True, "Form has no action attribute (or uses a no-op action)"
|
||
return False, None
|
||
|
||
|
||
def form_http_on_https_page(facts: Dict[str, Any]):
|
||
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
|
||
base_url = (facts.get("base_url") or "").strip()
|
||
action = (facts.get("action") or "").strip()
|
||
|
||
try:
|
||
base_scheme = (urlparse(base_url).scheme or "").lower()
|
||
parsed_act = urlparse(action)
|
||
act_scheme = (parsed_act.scheme or "").lower()
|
||
except Exception:
|
||
return False, None # parsing trouble → don’t flag
|
||
|
||
# Only flag absolute http:// actions on https pages.
|
||
# Relative or schemeless ('//host/...') isn’t flagged here (it won’t be HTTP on an HTTPS page).
|
||
if base_scheme == "https" and act_scheme == "http":
|
||
return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
|
||
return False, None
|
||
|
||
|
||
def form_submits_to_different_host(facts: Dict[str, Any]):
|
||
"""Flags <form> actions that submit to a different hostname than the page."""
|
||
base_host = (facts.get("base_hostname") or "").strip().lower()
|
||
action = (facts.get("action") or "").strip()
|
||
|
||
if not action or action in _NOOP_ACTIONS:
|
||
return False, None
|
||
|
||
try:
|
||
parsed = urlparse(action)
|
||
act_host = (parsed.hostname or "").lower()
|
||
except Exception:
|
||
return False, None
|
||
|
||
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
|
||
if act_host and base_host and act_host != base_host:
|
||
return True, f"Submits to a different host ({act_host} vs {base_host})"
|
||
return False, None |