feat(text): add text analysis pipeline & surface results in UI
- engine: add analyse_text() to extract visible page text and evaluate
category="text" rules; collect matched phrases and expose as
`content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len).
- engine: removed unused code
- browser: removed double call for enrichment
- engine: improve regex compilation — honor per-rule flags (string or list)
and default IGNORECASE when category=="text".
- engine: add dispatch logging "[engine] applying categories: …" gated by
settings.app.print_rule_dispatch.
- ui(templates): add `templates/partials/result_text.html` mirroring the forms
table; renders page-level records and their matched rules.
- ui(controller): wire `analyse_text()` into scan path and expose
`payload["suspicious_text"]`.
- rules(text): add `identity_verification_prompt`, `gated_document_access`,
`email_collection_prompt`; broaden `credential_reset`.
fix: text indicators were not displayed due to missing analyzer and mismatched result shape.
Result shape:
suspicious_text: [
{
"type": "page",
"content_snippet": "...matched phrases…",
"rules": [
{"name": "...", "description": "...", "severity": "medium", "tags": ["..."]}
]
}
]
This commit is contained in:
200
app/blueprints/main.py
Normal file
200
app/blueprints/main.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# app/blueprints/ui.py
|
||||
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
||||
|
||||
from app.utils.url_tools import get_url_normalizer
|
||||
from app.utils.browser import get_browser
|
||||
from app.utils.enrichment import enrich_url
|
||||
from app.utils.settings import get_settings
|
||||
from app.utils.io_helpers import get_recent_results
|
||||
from app.logging_setup import get_app_logger
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
bp = Blueprint("main", __name__)
|
||||
|
||||
settings = get_settings()
|
||||
app_name = settings.app.name
|
||||
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
||||
|
||||
|
||||
# --- data cleaner for tls to ensure data is standardized
|
||||
def normalize_ssl_tls_for_view(ssl_tls):
|
||||
"""
|
||||
Normalize/guard the ssl_tls structure for template rendering.
|
||||
Adds missing keys so Jinja doesn't need defensive checks everywhere.
|
||||
"""
|
||||
safe = {"crtsh": None, "probe": None, "error": None, "skipped": False, "reason": None}
|
||||
|
||||
if not isinstance(ssl_tls, dict):
|
||||
safe["error"] = "ssl_tls is not a dict"
|
||||
return safe
|
||||
|
||||
safe.update(ssl_tls)
|
||||
|
||||
if safe.get("skipped") is True:
|
||||
return safe # don’t force probe/crtsh keys when skipped
|
||||
|
||||
# Probe guards
|
||||
probe = safe.get("probe") or {}
|
||||
if "results_by_version" not in probe or not isinstance(probe["results_by_version"], dict):
|
||||
probe["results_by_version"] = {}
|
||||
if "weak_protocols" not in probe or not isinstance(probe["weak_protocols"], list):
|
||||
probe["weak_protocols"] = []
|
||||
if "weak_ciphers" not in probe or not isinstance(probe["weak_ciphers"], list):
|
||||
probe["weak_ciphers"] = []
|
||||
if "errors" not in probe or not isinstance(probe["errors"], list):
|
||||
probe["errors"] = []
|
||||
if "hostname" not in probe:
|
||||
probe["hostname"] = None
|
||||
if "port" not in probe:
|
||||
probe["port"] = 443
|
||||
safe["probe"] = probe
|
||||
|
||||
# crt.sh guards (we keep it mostly raw; macro only reads a few fields)
|
||||
if "crtsh" not in safe:
|
||||
safe["crtsh"] = None
|
||||
|
||||
return safe
|
||||
|
||||
|
||||
# --- context processor ---
|
||||
@bp.context_processor
|
||||
def inject_app_info():
|
||||
"""Inject app name and version into all templates."""
|
||||
return {
|
||||
"app_name": app_name,
|
||||
"app_version": app_version,
|
||||
"current_year": datetime.strftime(datetime.now(),"%Y")
|
||||
}
|
||||
|
||||
@bp.route("/", methods=["GET"])
|
||||
def index():
|
||||
"""
|
||||
Render the landing page with optional 'recent_results' list.
|
||||
|
||||
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
|
||||
Falls back to 10 if not present or invalid.
|
||||
"""
|
||||
# Pull recent count from settings with a safe fallback
|
||||
try:
|
||||
# settings is already initialized at module import in your file
|
||||
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
|
||||
if recent_count < 0:
|
||||
recent_count = 0
|
||||
except Exception:
|
||||
recent_count = 10
|
||||
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
|
||||
# Build the recent list (non-fatal if storage is empty or unreadable)
|
||||
recent_results = get_recent_results(storage, recent_count, app_logger)
|
||||
|
||||
# Pass to template; your index.html will hide the card if list is empty
|
||||
return render_template("index.html", recent_results=recent_results)
|
||||
|
||||
@bp.route("/analyze", methods=["POST"])
|
||||
def analyze():
|
||||
url = request.form.get("url", "").strip()
|
||||
|
||||
# Checkbox comes as '1' when checked, or None when not present
|
||||
fetch_ssl = request.form.get("fetch_ssl")
|
||||
fetch_ssl_enabled = bool(fetch_ssl == "1")
|
||||
|
||||
normalizer = get_url_normalizer()
|
||||
|
||||
try:
|
||||
target = normalizer.normalize_for_analysis(url)
|
||||
except ValueError:
|
||||
app_logger.warning("Empty or invalid URL input")
|
||||
return redirect(url_for("index"))
|
||||
|
||||
app_logger.info(f"[*] Analyzing URL{target}")
|
||||
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
|
||||
|
||||
if not target:
|
||||
flash("Please enter a URL.", "error")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
storage.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
browser = get_browser()
|
||||
result = asyncio.run(browser.fetch_page_artifacts(url,fetch_ssl_enabled=fetch_ssl_enabled))
|
||||
app_logger.info(f"[+] Analysis done for {url}")
|
||||
except Exception as e:
|
||||
flash(f"Analysis failed: {e}", "error")
|
||||
app_logger.error(f"Analysis failed for {url}: {e}")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
# Redirect to permalink page for this run
|
||||
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||
|
||||
@bp.route("/results/<run_uuid>", methods=["GET"])
|
||||
def view_result(run_uuid: str):
|
||||
"""
|
||||
View the analysis results for a given run UUID.
|
||||
Loads results.json from SANDBOX_STORAGE/<uuid>,
|
||||
normalizes structures for template safety, and renders the result page.
|
||||
"""
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
results_path = run_dir / "results.json"
|
||||
|
||||
# Ensure results exist
|
||||
if not results_path.exists():
|
||||
app_logger.error(f"Results not found for UUID: {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
# Load the results JSON
|
||||
with open(results_path, "r", encoding="utf-8") as f:
|
||||
result = json.load(f)
|
||||
|
||||
# Add UUID so template can build artifact links
|
||||
result["uuid"] = run_uuid
|
||||
|
||||
# === Normalize SSL/TLS structure for safe rendering ===
|
||||
if "ssl_tls" in result:
|
||||
result["ssl_tls"] = normalize_ssl_tls_for_view(result["ssl_tls"])
|
||||
|
||||
# Pass the enriched result dict to the template
|
||||
return render_template("result.html", **result)
|
||||
|
||||
|
||||
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
||||
def artifacts(run_uuid: str, filename: str):
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
full_path = run_dir / filename
|
||||
|
||||
# Prevent directory traversal
|
||||
try:
|
||||
full_path.relative_to(run_dir.resolve())
|
||||
except ValueError:
|
||||
app_logger.warning(f"Directory traversal attempt: {filename}")
|
||||
abort(404)
|
||||
|
||||
if not full_path.exists():
|
||||
app_logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
return send_file(full_path)
|
||||
|
||||
|
||||
@bp.get("/view/artifact/<run_uuid>/<filename>")
|
||||
def view_artifact(run_uuid, filename):
|
||||
# Build a safe raw URL that streams the file (you said you already have this route)
|
||||
raw_url = url_for('api.get_artifact_raw', run_uuid=run_uuid, filename=filename)
|
||||
# Optional: derive language server-side if you prefer
|
||||
language = None # e.g., 'javascript'
|
||||
return render_template('viewer.html', filename=filename, raw_url=raw_url, language=language)
|
||||
|
||||
Reference in New Issue
Block a user