feat: on-demand external script analysis + code viewer; refactor form analysis to rule engine
- API: add `POST /api/analyze_script` (app/blueprints/api.py)
- Fetch one external script to artifacts, run rules, return findings + snippet
- Uses new ExternalScriptFetcher (results_path aware) and job UUID
- Returns: { ok, final_url, status_code, bytes, truncated, sha256, artifact_path, findings[], snippet, snippet_len }
- TODO: document in openapi/openapi.yaml
- Fetcher: update `app/utils/external_fetch.py`
- Constructed with `results_path` (UUID dir); writes to `<results_path>/scripts/fetched/<index>.js`
- Loads settings via `get_settings()`, logs via std logging
- UI (results.html):
- Move “Analyze external script” action into **Content Snippet** column for external rows
- Clicking replaces button with `<details>` snippet, shows rule matches, and adds “open in viewer” link
- Robust fetch handler (checks JSON, shows errors); builds viewer URL from absolute artifact path
- Viewer:
- New route: `GET /view/artifact/<run_uuid>/<path:filename>` (app/blueprints/ui.py)
- New template: Monaco-based read-only code viewer (viewer.html)
- Removes SRI on loader to avoid integrity block; loads file via `raw_url` and detects language by extension
- Forms:
- Refactor `analyze_forms` to mirror scripts analysis:
- Uses rule engine (`category == "form"`) across regex/function rules
- Emits rows only when matches exist
- Includes `content_snippet`, `action`, `method`, `inputs`, `rules`
- Replace legacy plumbing (`flagged`, `flag_reasons`, `status`) in output
- Normalize form function rules to canonical returns `(bool, Optional[str])`:
- `form_action_missing`
- `form_http_on_https_page`
- `form_submits_to_different_host`
- Add minor hardening (lowercasing hosts, no-op actions, clearer reasons)
- CSS: add `.forms-table` to mirror `.scripts-table` (5 columns)
- Fixed table layout, widths per column, chip/snippet styling, responsive tweaks
- Misc:
- Fix “working outside app context” issue by avoiding `current_app` at import time (left storage logic inside routes)
- Add “View Source” link to open page source in viewer
Refs:
- Roadmap: mark “Source code viewer” done; keep TODO to add `/api/analyze_script` to OpenAPI
This commit is contained in:
212
app/blueprints/api.py
Normal file
212
app/blueprints/api.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# app/blueprints/api.py
|
||||
"""
|
||||
API blueprint for JSON endpoints.
|
||||
|
||||
Endpoints:
|
||||
POST /api/analyze_script
|
||||
Body:
|
||||
{
|
||||
"job_id": "<uuid>", # or "uuid": "<uuid>"
|
||||
"url": "https://cdn.example.com/app.js",
|
||||
"category": "script" # optional, defaults to "script"
|
||||
}
|
||||
Response:
|
||||
{
|
||||
"ok": true,
|
||||
"final_url": "...",
|
||||
"status_code": 200,
|
||||
"bytes": 12345,
|
||||
"truncated": false,
|
||||
"sha256": "...",
|
||||
"artifact_path": "/abs/path/to/<uuid>/scripts/fetched/<index>.js",
|
||||
"findings": [ { "name": "...", "description": "...", "severity": "...", "tags": [...], "reason": "..." }, ... ],
|
||||
"snippet": "<trimmed content>",
|
||||
"snippet_len": 45678
|
||||
}
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from flask import Blueprint, request, jsonify, current_app, send_file, abort
|
||||
from pathlib import Path
|
||||
|
||||
from app.utils.settings import get_settings
|
||||
from app.utils.external_fetcher import ExternalScriptFetcher
|
||||
from werkzeug.exceptions import HTTPException
|
||||
|
||||
api_bp = Blueprint("api", __name__, url_prefix="/api")
|
||||
|
||||
|
||||
def _resolve_results_path(job_id: str) -> str:
|
||||
"""
|
||||
Compute the absolute results directory for a given job UUID.
|
||||
Prefers <BASE>/artifacts/<uuid>, falls back to <BASE>/<uuid>.
|
||||
"""
|
||||
base_dir = "/data"
|
||||
|
||||
candidate_with_artifacts = os.path.join(base_dir, job_id)
|
||||
if os.path.isdir(candidate_with_artifacts):
|
||||
return candidate_with_artifacts
|
||||
|
||||
fallback = os.path.join(base_dir, job_id)
|
||||
os.makedirs(fallback, exist_ok=True)
|
||||
return fallback
|
||||
|
||||
|
||||
def _make_snippet(text: str, max_chars: int = 1200) -> str:
|
||||
"""Produce a trimmed, safe-to-render snippet of the script contents."""
|
||||
if not text:
|
||||
return ""
|
||||
snippet = text.strip()
|
||||
return (snippet[:max_chars] + "…") if len(snippet) > max_chars else snippet
|
||||
|
||||
@api_bp.errorhandler(400)
|
||||
@api_bp.errorhandler(403)
|
||||
@api_bp.errorhandler(404)
|
||||
@api_bp.errorhandler(405)
|
||||
def _api_err(err):
|
||||
"""
|
||||
Return JSON for common client errors.
|
||||
"""
|
||||
if isinstance(err, HTTPException):
|
||||
code = err.code
|
||||
name = (err.name or "error").lower()
|
||||
else:
|
||||
code = 400
|
||||
name = "error"
|
||||
return jsonify({"ok": False, "error": name}), code
|
||||
|
||||
|
||||
@api_bp.errorhandler(500)
|
||||
def _api_500(err):
|
||||
"""
|
||||
Return JSON for server errors and log the exception.
|
||||
"""
|
||||
try:
|
||||
current_app.logger.exception("API 500")
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"ok": False, "error": "internal server error"}), 500
|
||||
|
||||
|
||||
@api_bp.post("/analyze_script")
|
||||
def analyze_script():
|
||||
"""
|
||||
Analyze EXACTLY one external script URL for a given job UUID.
|
||||
|
||||
Expected JSON body:
|
||||
{ "job_id": "<uuid>", "url": "https://cdn.example.com/app.js", "category": "script" }
|
||||
"""
|
||||
body = request.get_json(silent=True) or {}
|
||||
|
||||
job_id_raw = body.get("job_id") or body.get("uuid")
|
||||
script_url_raw = body.get("url")
|
||||
category = (body.get("category") or "script").strip() or None # default to "script"
|
||||
|
||||
job_id = (job_id_raw or "").strip() if isinstance(job_id_raw, str) else ""
|
||||
script_url = (script_url_raw or "").strip() if isinstance(script_url_raw, str) else ""
|
||||
|
||||
# log this request
|
||||
current_app.logger.info(f"Got request to analyze {script_url} via API ")
|
||||
|
||||
if not job_id or not script_url:
|
||||
return jsonify({"ok": False, "error": "Missing job_id (or uuid) or url"}), 400
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
if not settings.external_fetch.enabled:
|
||||
return jsonify({"ok": False, "error": "Feature disabled"}), 400
|
||||
|
||||
# Resolve the UUID-backed results directory for this run.
|
||||
results_path = _resolve_results_path(job_id)
|
||||
|
||||
# Initialize the fetcher; it reads its own settings internally.
|
||||
fetcher = ExternalScriptFetcher(results_path=results_path)
|
||||
|
||||
# Unique index for the saved file name: <results_path>/scripts/fetched/<index>.js
|
||||
unique_index = int(time.time() * 1000)
|
||||
|
||||
outcome = fetcher.fetch_one(script_url=script_url, index=unique_index)
|
||||
if not outcome.ok or not outcome.saved_path:
|
||||
return jsonify({
|
||||
"ok": False,
|
||||
"error": outcome.reason,
|
||||
"status_code": outcome.status_code,
|
||||
"final_url": outcome.final_url
|
||||
}), 502
|
||||
|
||||
# Read bytes and decode to UTF-8 for rules and snippet
|
||||
try:
|
||||
with open(outcome.saved_path, "rb") as fh:
|
||||
js_text = fh.read().decode("utf-8", errors="ignore")
|
||||
except Exception:
|
||||
js_text = ""
|
||||
|
||||
# Pull the rules engine from the app (prefer attribute, then config).
|
||||
findings = []
|
||||
try:
|
||||
engine = getattr(current_app, "rule_engine", None)
|
||||
if engine is None:
|
||||
engine = current_app.config.get("RULE_ENGINE")
|
||||
except Exception:
|
||||
engine = None
|
||||
|
||||
if engine is not None and hasattr(engine, "run_all"):
|
||||
try:
|
||||
# run_all returns PASS/FAIL for each rule; we only surface FAIL (matched) to the UI
|
||||
all_results = engine.run_all(js_text, category=category)
|
||||
if isinstance(all_results, list):
|
||||
matched = []
|
||||
for r in all_results:
|
||||
try:
|
||||
if (r.get("result") == "FAIL"):
|
||||
matched.append({
|
||||
"name": r.get("name"),
|
||||
"description": r.get("description"),
|
||||
"severity": r.get("severity"),
|
||||
"tags": r.get("tags") or [],
|
||||
"reason": r.get("reason"),
|
||||
"category": r.get("category"),
|
||||
})
|
||||
except Exception:
|
||||
# Ignore malformed entries
|
||||
continue
|
||||
findings = matched
|
||||
except Exception as exc:
|
||||
try:
|
||||
current_app.logger.error("Rule engine error", extra={"error": str(exc)})
|
||||
except Exception:
|
||||
pass
|
||||
findings = []
|
||||
|
||||
snippet = _make_snippet(js_text, max_chars=settings.ui.snippet_preview_len)
|
||||
|
||||
return jsonify({
|
||||
"ok": True,
|
||||
"final_url": outcome.final_url,
|
||||
"status_code": outcome.status_code,
|
||||
"bytes": outcome.bytes_fetched,
|
||||
"truncated": outcome.truncated,
|
||||
"sha256": outcome.sha256_hex,
|
||||
"artifact_path": outcome.saved_path,
|
||||
"findings": findings, # only FAILed rules
|
||||
"snippet": snippet,
|
||||
"snippet_len": len(js_text)
|
||||
})
|
||||
|
||||
|
||||
@api_bp.get("/artifacts/<run_uuid>/<filename>")
|
||||
def get_artifact_raw(run_uuid, filename):
|
||||
# prevent path traversal
|
||||
if "/" in filename or ".." in filename:
|
||||
abort(400)
|
||||
|
||||
run_dir = _resolve_results_path(run_uuid)
|
||||
full_path = Path(run_dir) / filename
|
||||
|
||||
# if file is not there, give a 404
|
||||
if not os.path.isfile(full_path):
|
||||
abort(404)
|
||||
|
||||
# else return file
|
||||
return send_file(full_path, as_attachment=False)
|
||||
135
app/blueprints/ui.py
Normal file
135
app/blueprints/ui.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# app/blueprints/ui.py
|
||||
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
||||
|
||||
from app.utils.browser import get_browser
|
||||
from app.utils.enrichment import enrich_url
|
||||
from app.utils.settings import get_settings
|
||||
from app.utils.io_helpers import get_recent_results
|
||||
|
||||
bp = Blueprint("main", __name__)
|
||||
|
||||
settings = get_settings()
|
||||
app_name = settings.app.name
|
||||
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
||||
|
||||
# --- context processor ---
|
||||
@bp.context_processor
|
||||
def inject_app_info():
|
||||
"""Inject app name and version into all templates."""
|
||||
return {
|
||||
"app_name": app_name,
|
||||
"app_version": app_version
|
||||
}
|
||||
|
||||
@bp.route("/", methods=["GET"])
|
||||
def index():
|
||||
"""
|
||||
Render the landing page with optional 'recent_results' list.
|
||||
|
||||
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
|
||||
Falls back to 10 if not present or invalid.
|
||||
"""
|
||||
# Pull recent count from settings with a safe fallback
|
||||
try:
|
||||
# settings is already initialized at module import in your file
|
||||
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
|
||||
if recent_count < 0:
|
||||
recent_count = 0
|
||||
except Exception:
|
||||
recent_count = 10
|
||||
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
|
||||
# Build the recent list (non-fatal if storage is empty or unreadable)
|
||||
recent_results = get_recent_results(storage, recent_count, current_app.logger)
|
||||
|
||||
# Pass to template; your index.html will hide the card if list is empty
|
||||
return render_template("index.html", recent_results=recent_results)
|
||||
|
||||
@bp.route("/analyze", methods=["POST"])
|
||||
def analyze():
|
||||
url = request.form.get("url", "").strip()
|
||||
current_app.logger.info(f"[*] Analyzing {url}")
|
||||
if not url:
|
||||
flash("Please enter a URL.", "error")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
storage.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
browser = get_browser()
|
||||
result = asyncio.run(browser.fetch_page_artifacts(url))
|
||||
current_app.logger.info(f"[+] Analysis done for {url}")
|
||||
except Exception as e:
|
||||
flash(f"Analysis failed: {e}", "error")
|
||||
current_app.logger.error(f"Analysis failed for {url}: {e}")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
# Add enrichment safely
|
||||
try:
|
||||
enrichment = enrich_url(url)
|
||||
result["enrichment"] = enrichment
|
||||
current_app.logger.info(f"[+] Enrichment added for {url}")
|
||||
except Exception as e:
|
||||
result["enrichment"] = {}
|
||||
current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
||||
|
||||
# Redirect to permalink page for this run
|
||||
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||
|
||||
@bp.route("/results/<run_uuid>", methods=["GET"])
|
||||
def view_result(run_uuid: str):
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
results_path = run_dir / "results.json"
|
||||
|
||||
if not results_path.exists():
|
||||
current_app.logger.error(f"Results not found for UUID: {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
with open(results_path, "r", encoding="utf-8") as f:
|
||||
result = json.load(f)
|
||||
|
||||
# Pass the UUID to the template for artifact links
|
||||
result["uuid"] = run_uuid
|
||||
|
||||
return render_template("result.html", **result)
|
||||
|
||||
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
||||
def artifacts(run_uuid: str, filename: str):
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
full_path = run_dir / filename
|
||||
|
||||
# Prevent directory traversal
|
||||
try:
|
||||
full_path.relative_to(run_dir.resolve())
|
||||
except ValueError:
|
||||
current_app.logger.warning(f"Directory traversal attempt: {filename}")
|
||||
abort(404)
|
||||
|
||||
if not full_path.exists():
|
||||
current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
return send_file(full_path)
|
||||
|
||||
|
||||
@bp.get("/view/artifact/<run_uuid>/<filename>")
|
||||
def view_artifact(run_uuid, filename):
|
||||
# Build a safe raw URL that streams the file (you said you already have this route)
|
||||
raw_url = url_for('api.get_artifact_raw', run_uuid=run_uuid, filename=filename)
|
||||
# Optional: derive language server-side if you prefer
|
||||
language = None # e.g., 'javascript'
|
||||
return render_template('viewer.html', filename=filename, raw_url=raw_url, language=language)
|
||||
|
||||
Reference in New Issue
Block a user