feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload

- Add SSL/TLS intelligence pipeline: - crt.sh lookup with expired-filtering and root-domain wildcard resolution - live TLS version/cipher probe with weak/legacy flags and probe notes - UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists - Front page: checkbox to optionally fetch certificate/CT data - Introduce `URLNormalizer` with punycode support and typo repair - Auto-prepend `https://` for bare domains (e.g., `google.com`) - Optional quick HTTPS reachability + `http://` fallback - Provide singleton via function-cached `@singleton_loader`: - `get_url_normalizer()` reads defaults from Settings (if present) - Standardize function-rule return shape to `(bool, dict|None)` across `form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.) - Harden `FunctionRuleAdapter`: - Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs - Adapt non-dict inputs to facts (category-aware and via provided adapter) - Return `(True, dict)` on match, `(False, None)` on miss - Bind-time logging with file:line + function id for diagnostics - `RuleEngine`: - Back rules by private `self._rules`; `rules` property returns copy - Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile - Fix AttributeError from property assignment during `__init__` - Replace hidden singleton factory with explicit builder + global state: - `app/rules/factory.py::build_rules_engine()` builds and logs totals - `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF - `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()` - Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts - Unify logging wiring: - `wire_logging_once(app)` clears and attaches a single handler chain - Create two named loggers: `sneakyscope.app` and `sneakyscope.engine` - Disable propagation to prevent dupes; include pid/logger name in format - Remove stray/duplicate handlers and import-time logging - Optional dedup filter for bursty repeats (kept off by default) - Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration - Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`) - Jinja: replace `{% return %}` with structured `if/elif/else` branches - Add toggle button to show raw JSON for TLS/CT section - Consumers should import the rules engine via: - `from app.state import get_rules_engine` - Use `build_rules_engine()` **only** during preload/init to construct the instance, then publish with `set_rules_engine()`. Do not call old singleton factories. - New/changed modules (high level): - `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()` - `app/rules/function_rules.py` (±) — normalized payload returns - `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs - `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes - `app/rules/factory.py` (±) — pure builder; totals logged post-registration - `app/state.py` (+) — process-global rules engine - `app/logging_setup.py` (±) — single chain, two named loggers - `app/wsgi.py` (±) — preload build + `set_rules_engine()` - `entrypoint.sh` (±) — add `--preload` - templates (±) — TLS card, raw toggle; front-page checkbox Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
2025-08-21 22:05:16 -05:00
parent f639ad0934
commit 693f7d67b9
22 changed files with 1476 additions and 256 deletions
--- a/app/blueprints/api.py
+++ b/app/blueprints/api.py
@@ -30,12 +30,14 @@ import time
 from flask import Blueprint, request, jsonify, current_app, send_file, abort
 from pathlib import Path

+from app.logging_setup import get_app_logger
 from app.utils.settings import get_settings
 from app.utils.external_fetcher import ExternalScriptFetcher
 from werkzeug.exceptions import HTTPException

 api_bp = Blueprint("api", __name__, url_prefix="/api")

+app_logger = get_app_logger()

 def _resolve_results_path(job_id: str) -> str:
    """
@@ -83,7 +85,7 @@ def _api_500(err):
    Return JSON for server errors and log the exception.
    """
    try:
-        current_app.logger.exception("API 500")
+        app_logger.exception("API 500")
    except Exception:
        pass
    return jsonify({"ok": False, "error": "internal server error"}), 500
@@ -107,7 +109,7 @@ def analyze_script():
    script_url = (script_url_raw or "").strip() if isinstance(script_url_raw, str) else ""
    
    # log this request
-    current_app.logger.info(f"Got request to analyze {script_url} via API ")
+    app_logger.info(f"Got request to analyze {script_url} via API ")

    if not job_id or not script_url:
        return jsonify({"ok": False, "error": "Missing job_id (or uuid) or url"}), 400
@@ -174,7 +176,7 @@ def analyze_script():
                findings = matched
        except Exception as exc:
            try:
-                current_app.logger.error("Rule engine error", extra={"error": str(exc)})
+                app_logger.error("Rule engine error", extra={"error": str(exc)})
            except Exception:
                pass
            findings = []
--- a/app/blueprints/ui.py
+++ b/app/blueprints/ui.py
@@ -7,10 +7,14 @@ from pathlib import Path
 from datetime import datetime
 from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort

+from app.utils.url_tools import get_url_normalizer
 from app.utils.browser import get_browser
 from app.utils.enrichment import enrich_url
 from app.utils.settings import get_settings
 from app.utils.io_helpers import get_recent_results
+from app.logging_setup import get_app_logger
+
+app_logger = get_app_logger()

 bp = Blueprint("main", __name__)

@@ -18,6 +22,47 @@ settings = get_settings()
 app_name = settings.app.name
 app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"

+
+# --- data cleaner for tls to ensure data is standardized
+def normalize_ssl_tls_for_view(ssl_tls):
+    """
+    Normalize/guard the ssl_tls structure for template rendering.
+    Adds missing keys so Jinja doesn't need defensive checks everywhere.
+    """
+    safe = {"crtsh": None, "probe": None, "error": None, "skipped": False, "reason": None}
+
+    if not isinstance(ssl_tls, dict):
+        safe["error"] = "ssl_tls is not a dict"
+        return safe
+
+    safe.update(ssl_tls)
+
+    if safe.get("skipped") is True:
+        return safe  # don’t force probe/crtsh keys when skipped
+
+    # Probe guards
+    probe = safe.get("probe") or {}
+    if "results_by_version" not in probe or not isinstance(probe["results_by_version"], dict):
+        probe["results_by_version"] = {}
+    if "weak_protocols" not in probe or not isinstance(probe["weak_protocols"], list):
+        probe["weak_protocols"] = []
+    if "weak_ciphers" not in probe or not isinstance(probe["weak_ciphers"], list):
+        probe["weak_ciphers"] = []
+    if "errors" not in probe or not isinstance(probe["errors"], list):
+        probe["errors"] = []
+    if "hostname" not in probe:
+        probe["hostname"] = None
+    if "port" not in probe:
+        probe["port"] = 443
+    safe["probe"] = probe
+
+    # crt.sh guards (we keep it mostly raw; macro only reads a few fields)
+    if "crtsh" not in safe:
+        safe["crtsh"] = None
+
+    return safe
+
+
 # --- context processor ---
@bp.context_processor
 def inject_app_info():
@@ -48,7 +93,7 @@ def index():
    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()

    # Build the recent list (non-fatal if storage is empty or unreadable)
-    recent_results = get_recent_results(storage, recent_count, current_app.logger)
+    recent_results = get_recent_results(storage, recent_count, app_logger)

    # Pass to template; your index.html will hide the card if list is empty
    return render_template("index.html", recent_results=recent_results)
@@ -56,8 +101,23 @@ def index():
@bp.route("/analyze", methods=["POST"])
 def analyze():
    url = request.form.get("url", "").strip()
-    current_app.logger.info(f"[*] Analyzing {url}")
-    if not url:
+    
+    # Checkbox comes as '1' when checked, or None when not present
+    fetch_ssl = request.form.get("fetch_ssl")
+    fetch_ssl_enabled = bool(fetch_ssl == "1")
+
+    normalizer = get_url_normalizer()
+
+    try:
+        target = normalizer.normalize_for_analysis(url)
+    except ValueError:
+        app_logger.warning("Empty or invalid URL input")
+        return redirect(url_for("index"))
+
+    app_logger.info(f"[*] Analyzing URL{target}")
+    app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
+
+    if not target:
        flash("Please enter a URL.", "error")
        return redirect(url_for("main.index"))
    
@@ -66,44 +126,57 @@ def analyze():

    try:
        browser = get_browser()
-        result = asyncio.run(browser.fetch_page_artifacts(url))
-        current_app.logger.info(f"[+] Analysis done for {url}")
+        result = asyncio.run(browser.fetch_page_artifacts(url,fetch_ssl_enabled=fetch_ssl_enabled))
+        app_logger.info(f"[+] Analysis done for {url}")
    except Exception as e:
        flash(f"Analysis failed: {e}", "error")
-        current_app.logger.error(f"Analysis failed for {url}: {e}")
+        app_logger.error(f"Analysis failed for {url}: {e}")
        return redirect(url_for("main.index"))

    # Add enrichment safely
    try:
        enrichment = enrich_url(url)
        result["enrichment"] = enrichment
-        current_app.logger.info(f"[+] Enrichment added for {url}")
+        app_logger.info(f"[+] Enrichment added for {url}")
    except Exception as e:
        result["enrichment"] = {}
-        current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
+        app_logger.warning(f"[!] Enrichment failed for {url}: {e}")

    # Redirect to permalink page for this run
    return redirect(url_for("main.view_result", run_uuid=result["uuid"]))

@bp.route("/results/<run_uuid>", methods=["GET"])
 def view_result(run_uuid: str):
+    """
+    View the analysis results for a given run UUID.
+    Loads results.json from SANDBOX_STORAGE/<uuid>,
+    normalizes structures for template safety, and renders the result page.
+    """
    # Resolve SANDBOX_STORAGE from app config
    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
    run_dir = storage / run_uuid
    results_path = run_dir / "results.json"

+    # Ensure results exist
    if not results_path.exists():
-        current_app.logger.error(f"Results not found for UUID: {run_uuid}")
+        app_logger.error(f"Results not found for UUID: {run_uuid}")
        abort(404)

+    # Load the results JSON
    with open(results_path, "r", encoding="utf-8") as f:
        result = json.load(f)

-    # Pass the UUID to the template for artifact links
+    # Add UUID so template can build artifact links
    result["uuid"] = run_uuid

+    # === Normalize SSL/TLS structure for safe rendering ===
+    if "ssl_tls" in result:
+        result["ssl_tls"] = normalize_ssl_tls_for_view(result["ssl_tls"])
+
+    # Pass the enriched result dict to the template
    return render_template("result.html", **result)

+
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
 def artifacts(run_uuid: str, filename: str):
    # Resolve SANDBOX_STORAGE from app config
@@ -115,11 +188,11 @@ def artifacts(run_uuid: str, filename: str):
    try:
        full_path.relative_to(run_dir.resolve())
    except ValueError:
-        current_app.logger.warning(f"Directory traversal attempt: {filename}")
+        app_logger.warning(f"Directory traversal attempt: {filename}")
        abort(404)

    if not full_path.exists():
-        current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
+        app_logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
        abort(404)

    return send_file(full_path)