feat: on-demand external script analysis + code viewer; refactor form analysis to rule engine
- API: add `POST /api/analyze_script` (app/blueprints/api.py)
- Fetch one external script to artifacts, run rules, return findings + snippet
- Uses new ExternalScriptFetcher (results_path aware) and job UUID
- Returns: { ok, final_url, status_code, bytes, truncated, sha256, artifact_path, findings[], snippet, snippet_len }
- TODO: document in openapi/openapi.yaml
- Fetcher: update `app/utils/external_fetch.py`
- Constructed with `results_path` (UUID dir); writes to `<results_path>/scripts/fetched/<index>.js`
- Loads settings via `get_settings()`, logs via std logging
- UI (results.html):
- Move “Analyze external script” action into **Content Snippet** column for external rows
- Clicking replaces button with `<details>` snippet, shows rule matches, and adds “open in viewer” link
- Robust fetch handler (checks JSON, shows errors); builds viewer URL from absolute artifact path
- Viewer:
- New route: `GET /view/artifact/<run_uuid>/<path:filename>` (app/blueprints/ui.py)
- New template: Monaco-based read-only code viewer (viewer.html)
- Removes SRI on loader to avoid integrity block; loads file via `raw_url` and detects language by extension
- Forms:
- Refactor `analyze_forms` to mirror scripts analysis:
- Uses rule engine (`category == "form"`) across regex/function rules
- Emits rows only when matches exist
- Includes `content_snippet`, `action`, `method`, `inputs`, `rules`
- Replace legacy plumbing (`flagged`, `flag_reasons`, `status`) in output
- Normalize form function rules to canonical returns `(bool, Optional[str])`:
- `form_action_missing`
- `form_http_on_https_page`
- `form_submits_to_different_host`
- Add minor hardening (lowercasing hosts, no-op actions, clearer reasons)
- CSS: add `.forms-table` to mirror `.scripts-table` (5 columns)
- Fixed table layout, widths per column, chip/snippet styling, responsive tweaks
- Misc:
- Fix “working outside app context” issue by avoiding `current_app` at import time (left storage logic inside routes)
- Add “View Source” link to open page source in viewer
Refs:
- Roadmap: mark “Source code viewer” done; keep TODO to add `/api/analyze_script` to OpenAPI
This commit is contained in:
@@ -22,6 +22,7 @@ from __future__ import annotations
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapters
|
||||
@@ -169,35 +170,48 @@ def script_third_party_host(facts: Dict[str, Any]):
|
||||
|
||||
# ---------------- Form rules ----------------
|
||||
|
||||
def form_submits_to_different_host(facts: Dict[str, Any]):
|
||||
"""Flags <form> actions that submit to a different hostname than the page."""
|
||||
base_host = facts.get("base_hostname") or ""
|
||||
action = facts.get("action") or ""
|
||||
try:
|
||||
action_host = urlparse(action).hostname
|
||||
if action_host and base_host and action_host != base_host:
|
||||
return True, "Form submits to a different host"
|
||||
except Exception:
|
||||
# Parsing failed; treat as no match rather than erroring out
|
||||
pass
|
||||
def form_action_missing(facts: Dict[str, Any]):
|
||||
"""Flags <form> elements with no meaningful action attribute."""
|
||||
action = (facts.get("action") or "").strip()
|
||||
if action in _NOOP_ACTIONS:
|
||||
return True, "Form has no action attribute (or uses a no-op action)"
|
||||
return False, None
|
||||
|
||||
|
||||
def form_http_on_https_page(facts: Dict[str, Any]):
|
||||
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
|
||||
base_url = facts.get("base_url") or ""
|
||||
action = facts.get("action") or ""
|
||||
base_url = (facts.get("base_url") or "").strip()
|
||||
action = (facts.get("action") or "").strip()
|
||||
|
||||
try:
|
||||
if urlparse(base_url).scheme == "https" and urlparse(action).scheme == "http":
|
||||
return True, "Form submits over insecure HTTP"
|
||||
base_scheme = (urlparse(base_url).scheme or "").lower()
|
||||
parsed_act = urlparse(action)
|
||||
act_scheme = (parsed_act.scheme or "").lower()
|
||||
except Exception:
|
||||
pass
|
||||
return False, None # parsing trouble → don’t flag
|
||||
|
||||
# Only flag absolute http:// actions on https pages.
|
||||
# Relative or schemeless ('//host/...') isn’t flagged here (it won’t be HTTP on an HTTPS page).
|
||||
if base_scheme == "https" and act_scheme == "http":
|
||||
return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
|
||||
return False, None
|
||||
|
||||
|
||||
def form_action_missing(facts: Dict[str, Any]):
|
||||
"""Flags <form> elements with no action attribute."""
|
||||
action = (facts.get("action") or "").strip()
|
||||
if not action:
|
||||
return True, "Form has no action attribute"
|
||||
return False, None
|
||||
def form_submits_to_different_host(facts: Dict[str, Any]):
|
||||
"""Flags <form> actions that submit to a different hostname than the page."""
|
||||
base_host = (facts.get("base_hostname") or "").strip().lower()
|
||||
action = (facts.get("action") or "").strip()
|
||||
|
||||
if not action or action in _NOOP_ACTIONS:
|
||||
return False, None
|
||||
|
||||
try:
|
||||
parsed = urlparse(action)
|
||||
act_host = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
return False, None
|
||||
|
||||
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
|
||||
if act_host and base_host and act_host != base_host:
|
||||
return True, f"Submits to a different host ({act_host} vs {base_host})"
|
||||
return False, None
|
||||
Reference in New Issue
Block a user