Files
Phillip Tarrant 693f7d67b9 feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
  - crt.sh lookup with expired-filtering and root-domain wildcard resolution
  - live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data

- Introduce `URLNormalizer` with punycode support and typo repair
  - Auto-prepend `https://` for bare domains (e.g., `google.com`)
  - Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
  - `get_url_normalizer()` reads defaults from Settings (if present)

- Standardize function-rule return shape to `(bool, dict|None)` across
  `form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
  - Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
  - Adapt non-dict inputs to facts (category-aware and via provided adapter)
  - Return `(True, dict)` on match, `(False, None)` on miss
  - Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
  - Back rules by private `self._rules`; `rules` property returns copy
  - Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
  - Fix AttributeError from property assignment during `__init__`

- Replace hidden singleton factory with explicit builder + global state:
  - `app/rules/factory.py::build_rules_engine()` builds and logs totals
  - `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
  - `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts

- Unify logging wiring:
  - `wire_logging_once(app)` clears and attaches a single handler chain
  - Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
  - Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)

- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)

- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section

- Consumers should import the rules engine via:
  - `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
  then publish with `set_rules_engine()`. Do not call old singleton factories.

- New/changed modules (high level):
  - `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
  - `app/rules/function_rules.py` (±) — normalized payload returns
  - `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
  - `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
  - `app/rules/factory.py` (±) — pure builder; totals logged post-registration
  - `app/state.py` (+) — process-global rules engine
  - `app/logging_setup.py` (±) — single chain, two named loggers
  - `app/wsgi.py` (±) — preload build + `set_rules_engine()`
  - `entrypoint.sh` (±) — add `--preload`
  - templates (±) — TLS card, raw toggle; front-page checkbox

Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
2025-08-21 22:05:16 -05:00

209 lines
7.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# app/blueprints/ui.py
import os
import json
import asyncio
from pathlib import Path
from datetime import datetime
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
from app.utils.url_tools import get_url_normalizer
from app.utils.browser import get_browser
from app.utils.enrichment import enrich_url
from app.utils.settings import get_settings
from app.utils.io_helpers import get_recent_results
from app.logging_setup import get_app_logger
app_logger = get_app_logger()
bp = Blueprint("main", __name__)
settings = get_settings()
app_name = settings.app.name
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
# --- data cleaner for tls to ensure data is standardized
def normalize_ssl_tls_for_view(ssl_tls):
"""
Normalize/guard the ssl_tls structure for template rendering.
Adds missing keys so Jinja doesn't need defensive checks everywhere.
"""
safe = {"crtsh": None, "probe": None, "error": None, "skipped": False, "reason": None}
if not isinstance(ssl_tls, dict):
safe["error"] = "ssl_tls is not a dict"
return safe
safe.update(ssl_tls)
if safe.get("skipped") is True:
return safe # dont force probe/crtsh keys when skipped
# Probe guards
probe = safe.get("probe") or {}
if "results_by_version" not in probe or not isinstance(probe["results_by_version"], dict):
probe["results_by_version"] = {}
if "weak_protocols" not in probe or not isinstance(probe["weak_protocols"], list):
probe["weak_protocols"] = []
if "weak_ciphers" not in probe or not isinstance(probe["weak_ciphers"], list):
probe["weak_ciphers"] = []
if "errors" not in probe or not isinstance(probe["errors"], list):
probe["errors"] = []
if "hostname" not in probe:
probe["hostname"] = None
if "port" not in probe:
probe["port"] = 443
safe["probe"] = probe
# crt.sh guards (we keep it mostly raw; macro only reads a few fields)
if "crtsh" not in safe:
safe["crtsh"] = None
return safe
# --- context processor ---
@bp.context_processor
def inject_app_info():
"""Inject app name and version into all templates."""
return {
"app_name": app_name,
"app_version": app_version
}
@bp.route("/", methods=["GET"])
def index():
"""
Render the landing page with optional 'recent_results' list.
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
Falls back to 10 if not present or invalid.
"""
# Pull recent count from settings with a safe fallback
try:
# settings is already initialized at module import in your file
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
if recent_count < 0:
recent_count = 0
except Exception:
recent_count = 10
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
# Build the recent list (non-fatal if storage is empty or unreadable)
recent_results = get_recent_results(storage, recent_count, app_logger)
# Pass to template; your index.html will hide the card if list is empty
return render_template("index.html", recent_results=recent_results)
@bp.route("/analyze", methods=["POST"])
def analyze():
url = request.form.get("url", "").strip()
# Checkbox comes as '1' when checked, or None when not present
fetch_ssl = request.form.get("fetch_ssl")
fetch_ssl_enabled = bool(fetch_ssl == "1")
normalizer = get_url_normalizer()
try:
target = normalizer.normalize_for_analysis(url)
except ValueError:
app_logger.warning("Empty or invalid URL input")
return redirect(url_for("index"))
app_logger.info(f"[*] Analyzing URL{target}")
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
if not target:
flash("Please enter a URL.", "error")
return redirect(url_for("main.index"))
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
storage.mkdir(parents=True, exist_ok=True)
try:
browser = get_browser()
result = asyncio.run(browser.fetch_page_artifacts(url,fetch_ssl_enabled=fetch_ssl_enabled))
app_logger.info(f"[+] Analysis done for {url}")
except Exception as e:
flash(f"Analysis failed: {e}", "error")
app_logger.error(f"Analysis failed for {url}: {e}")
return redirect(url_for("main.index"))
# Add enrichment safely
try:
enrichment = enrich_url(url)
result["enrichment"] = enrichment
app_logger.info(f"[+] Enrichment added for {url}")
except Exception as e:
result["enrichment"] = {}
app_logger.warning(f"[!] Enrichment failed for {url}: {e}")
# Redirect to permalink page for this run
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
@bp.route("/results/<run_uuid>", methods=["GET"])
def view_result(run_uuid: str):
"""
View the analysis results for a given run UUID.
Loads results.json from SANDBOX_STORAGE/<uuid>,
normalizes structures for template safety, and renders the result page.
"""
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
run_dir = storage / run_uuid
results_path = run_dir / "results.json"
# Ensure results exist
if not results_path.exists():
app_logger.error(f"Results not found for UUID: {run_uuid}")
abort(404)
# Load the results JSON
with open(results_path, "r", encoding="utf-8") as f:
result = json.load(f)
# Add UUID so template can build artifact links
result["uuid"] = run_uuid
# === Normalize SSL/TLS structure for safe rendering ===
if "ssl_tls" in result:
result["ssl_tls"] = normalize_ssl_tls_for_view(result["ssl_tls"])
# Pass the enriched result dict to the template
return render_template("result.html", **result)
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
def artifacts(run_uuid: str, filename: str):
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
run_dir = storage / run_uuid
full_path = run_dir / filename
# Prevent directory traversal
try:
full_path.relative_to(run_dir.resolve())
except ValueError:
app_logger.warning(f"Directory traversal attempt: {filename}")
abort(404)
if not full_path.exists():
app_logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
abort(404)
return send_file(full_path)
@bp.get("/view/artifact/<run_uuid>/<filename>")
def view_artifact(run_uuid, filename):
# Build a safe raw URL that streams the file (you said you already have this route)
raw_url = url_for('api.get_artifact_raw', run_uuid=run_uuid, filename=filename)
# Optional: derive language server-side if you prefer
language = None # e.g., 'javascript'
return render_template('viewer.html', filename=filename, raw_url=raw_url, language=language)