Files
SneakyScope/app/blueprints/main.py
Phillip Tarrant 55cd81aec0 feat(text): add text analysis pipeline & surface results in UI
- engine: add analyse_text() to extract visible page text and evaluate
  category="text" rules; collect matched phrases and expose as
  `content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len).
- engine: removed unused code
- browser: removed double call for enrichment
- engine: improve regex compilation — honor per-rule flags (string or list)
  and default IGNORECASE when category=="text".
- engine: add dispatch logging "[engine] applying categories: …" gated by
  settings.app.print_rule_dispatch.
- ui(templates): add `templates/partials/result_text.html` mirroring the forms
  table; renders page-level records and their matched rules.
- ui(controller): wire `analyse_text()` into scan path and expose
  `payload["suspicious_text"]`.
- rules(text): add `identity_verification_prompt`, `gated_document_access`,
  `email_collection_prompt`; broaden `credential_reset`.

fix: text indicators were not displayed due to missing analyzer and mismatched result shape.

Result shape:
  suspicious_text: [
    {
      "type": "page",
      "content_snippet": "...matched phrases…",
      "rules": [
        {"name": "...", "description": "...", "severity": "medium", "tags": ["..."]}
      ]
    }
  ]
2025-08-22 17:18:50 -05:00

201 lines
6.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# app/blueprints/ui.py
import os
import json
import asyncio
from pathlib import Path
from datetime import datetime
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
from app.utils.url_tools import get_url_normalizer
from app.utils.browser import get_browser
from app.utils.enrichment import enrich_url
from app.utils.settings import get_settings
from app.utils.io_helpers import get_recent_results
from app.logging_setup import get_app_logger
app_logger = get_app_logger()
bp = Blueprint("main", __name__)
settings = get_settings()
app_name = settings.app.name
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
# --- data cleaner for tls to ensure data is standardized
def normalize_ssl_tls_for_view(ssl_tls):
"""
Normalize/guard the ssl_tls structure for template rendering.
Adds missing keys so Jinja doesn't need defensive checks everywhere.
"""
safe = {"crtsh": None, "probe": None, "error": None, "skipped": False, "reason": None}
if not isinstance(ssl_tls, dict):
safe["error"] = "ssl_tls is not a dict"
return safe
safe.update(ssl_tls)
if safe.get("skipped") is True:
return safe # dont force probe/crtsh keys when skipped
# Probe guards
probe = safe.get("probe") or {}
if "results_by_version" not in probe or not isinstance(probe["results_by_version"], dict):
probe["results_by_version"] = {}
if "weak_protocols" not in probe or not isinstance(probe["weak_protocols"], list):
probe["weak_protocols"] = []
if "weak_ciphers" not in probe or not isinstance(probe["weak_ciphers"], list):
probe["weak_ciphers"] = []
if "errors" not in probe or not isinstance(probe["errors"], list):
probe["errors"] = []
if "hostname" not in probe:
probe["hostname"] = None
if "port" not in probe:
probe["port"] = 443
safe["probe"] = probe
# crt.sh guards (we keep it mostly raw; macro only reads a few fields)
if "crtsh" not in safe:
safe["crtsh"] = None
return safe
# --- context processor ---
@bp.context_processor
def inject_app_info():
"""Inject app name and version into all templates."""
return {
"app_name": app_name,
"app_version": app_version,
"current_year": datetime.strftime(datetime.now(),"%Y")
}
@bp.route("/", methods=["GET"])
def index():
"""
Render the landing page with optional 'recent_results' list.
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
Falls back to 10 if not present or invalid.
"""
# Pull recent count from settings with a safe fallback
try:
# settings is already initialized at module import in your file
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
if recent_count < 0:
recent_count = 0
except Exception:
recent_count = 10
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
# Build the recent list (non-fatal if storage is empty or unreadable)
recent_results = get_recent_results(storage, recent_count, app_logger)
# Pass to template; your index.html will hide the card if list is empty
return render_template("index.html", recent_results=recent_results)
@bp.route("/analyze", methods=["POST"])
def analyze():
url = request.form.get("url", "").strip()
# Checkbox comes as '1' when checked, or None when not present
fetch_ssl = request.form.get("fetch_ssl")
fetch_ssl_enabled = bool(fetch_ssl == "1")
normalizer = get_url_normalizer()
try:
target = normalizer.normalize_for_analysis(url)
except ValueError:
app_logger.warning("Empty or invalid URL input")
return redirect(url_for("index"))
app_logger.info(f"[*] Analyzing URL{target}")
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
if not target:
flash("Please enter a URL.", "error")
return redirect(url_for("main.index"))
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
storage.mkdir(parents=True, exist_ok=True)
try:
browser = get_browser()
result = asyncio.run(browser.fetch_page_artifacts(url,fetch_ssl_enabled=fetch_ssl_enabled))
app_logger.info(f"[+] Analysis done for {url}")
except Exception as e:
flash(f"Analysis failed: {e}", "error")
app_logger.error(f"Analysis failed for {url}: {e}")
return redirect(url_for("main.index"))
# Redirect to permalink page for this run
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
@bp.route("/results/<run_uuid>", methods=["GET"])
def view_result(run_uuid: str):
"""
View the analysis results for a given run UUID.
Loads results.json from SANDBOX_STORAGE/<uuid>,
normalizes structures for template safety, and renders the result page.
"""
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
run_dir = storage / run_uuid
results_path = run_dir / "results.json"
# Ensure results exist
if not results_path.exists():
app_logger.error(f"Results not found for UUID: {run_uuid}")
abort(404)
# Load the results JSON
with open(results_path, "r", encoding="utf-8") as f:
result = json.load(f)
# Add UUID so template can build artifact links
result["uuid"] = run_uuid
# === Normalize SSL/TLS structure for safe rendering ===
if "ssl_tls" in result:
result["ssl_tls"] = normalize_ssl_tls_for_view(result["ssl_tls"])
# Pass the enriched result dict to the template
return render_template("result.html", **result)
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
def artifacts(run_uuid: str, filename: str):
# Resolve SANDBOX_STORAGE from app config
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
run_dir = storage / run_uuid
full_path = run_dir / filename
# Prevent directory traversal
try:
full_path.relative_to(run_dir.resolve())
except ValueError:
app_logger.warning(f"Directory traversal attempt: {filename}")
abort(404)
if not full_path.exists():
app_logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
abort(404)
return send_file(full_path)
@bp.get("/view/artifact/<run_uuid>/<filename>")
def view_artifact(run_uuid, filename):
# Build a safe raw URL that streams the file (you said you already have this route)
raw_url = url_for('api.get_artifact_raw', run_uuid=run_uuid, filename=filename)
# Optional: derive language server-side if you prefer
language = None # e.g., 'javascript'
return render_template('viewer.html', filename=filename, raw_url=raw_url, language=language)