feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
- crt.sh lookup with expired-filtering and root-domain wildcard resolution
- live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data
- Introduce `URLNormalizer` with punycode support and typo repair
- Auto-prepend `https://` for bare domains (e.g., `google.com`)
- Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
- `get_url_normalizer()` reads defaults from Settings (if present)
- Standardize function-rule return shape to `(bool, dict|None)` across
`form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
- Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
- Adapt non-dict inputs to facts (category-aware and via provided adapter)
- Return `(True, dict)` on match, `(False, None)` on miss
- Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
- Back rules by private `self._rules`; `rules` property returns copy
- Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
- Fix AttributeError from property assignment during `__init__`
- Replace hidden singleton factory with explicit builder + global state:
- `app/rules/factory.py::build_rules_engine()` builds and logs totals
- `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
- `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts
- Unify logging wiring:
- `wire_logging_once(app)` clears and attaches a single handler chain
- Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
- Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)
- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)
- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section
- Consumers should import the rules engine via:
- `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
then publish with `set_rules_engine()`. Do not call old singleton factories.
- New/changed modules (high level):
- `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
- `app/rules/function_rules.py` (±) — normalized payload returns
- `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
- `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
- `app/rules/factory.py` (±) — pure builder; totals logged post-registration
- `app/state.py` (+) — process-global rules engine
- `app/logging_setup.py` (±) — single chain, two named loggers
- `app/wsgi.py` (±) — preload build + `set_rules_engine()`
- `entrypoint.sh` (±) — add `--preload`
- templates (±) — TLS card, raw toggle; front-page checkbox
Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
This commit is contained in:
116
app/__init__.py
116
app/__init__.py
@@ -5,26 +5,11 @@ from flask import Flask
|
|||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
from .utils.settings import get_settings
|
from .utils.settings import get_settings
|
||||||
from .utils.rules_engine import RuleEngine, load_rules_from_yaml, Rule
|
from .logging_setup import wire_logging_once, get_app_logger, get_engine_logger
|
||||||
|
|
||||||
# our code based rules
|
|
||||||
from .rules.function_rules import (
|
|
||||||
FactAdapter,
|
|
||||||
FunctionRuleAdapter,
|
|
||||||
script_src_uses_data_or_blob,
|
|
||||||
script_src_has_dangerous_extension,
|
|
||||||
script_third_party_host,
|
|
||||||
form_submits_to_different_host,
|
|
||||||
form_http_on_https_page,
|
|
||||||
form_action_missing,
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.blueprints import ui # ui blueprint
|
from app.blueprints import ui # ui blueprint
|
||||||
from app.blueprints import api # api blueprint
|
from app.blueprints import api # api blueprint
|
||||||
|
|
||||||
# from .utils import io_helpers # if need logging/setup later
|
|
||||||
# from .utils import cache_db # available for future injections
|
|
||||||
|
|
||||||
def create_app() -> Flask:
|
def create_app() -> Flask:
|
||||||
"""
|
"""
|
||||||
Create and configure the Flask application instance.
|
Create and configure the Flask application instance.
|
||||||
@@ -35,103 +20,23 @@ def create_app() -> Flask:
|
|||||||
# Basic app object
|
# Basic app object
|
||||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||||
|
|
||||||
|
# logging setup
|
||||||
|
wire_logging_once(app)
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
# Load settings (safe fallback to defaults if file missing)
|
# Load settings (safe fallback to defaults if file missing)
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
||||||
# Secret key loaded from env (warn if missing)
|
# Secret key loaded from env (warn if missing)
|
||||||
app.secret_key = os.getenv("SECRET_KEY")
|
app.secret_key = os.getenv("SECRET_KEY")
|
||||||
if not app.secret_key:
|
if not app.secret_key:
|
||||||
app.logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
|
app_logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
|
||||||
|
|
||||||
# Configure storage directory (bind-mount is still handled by sandbox.sh)
|
# Configure storage directory (bind-mount is still handled by sandbox.sh)
|
||||||
sandbox_storage_default = Path("/data")
|
sandbox_storage_default = Path("/data")
|
||||||
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
|
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
|
||||||
|
|
||||||
# ---------------------------
|
|
||||||
# Suspicious Rules Engine
|
|
||||||
# ---------------------------
|
|
||||||
|
|
||||||
# Determine rules file path relative to this package (allow env override)
|
|
||||||
base_dir = Path(__file__).resolve().parent
|
|
||||||
default_rules_path = base_dir / "config" / "suspicious_rules.yaml"
|
|
||||||
rules_path_str = os.getenv("SNEAKYSCOPE_RULES_FILE", str(default_rules_path))
|
|
||||||
rules_path = Path(rules_path_str)
|
|
||||||
|
|
||||||
# Create engine bound to Flask logger so all verbose/debug goes to app.logger
|
|
||||||
engine = RuleEngine(rules=[], logger=app.logger)
|
|
||||||
|
|
||||||
# Try to load from YAML if present; log clearly if not
|
|
||||||
if rules_path.exists():
|
|
||||||
try:
|
|
||||||
loaded_rules = load_rules_from_yaml(rules_path, logger=app.logger)
|
|
||||||
# Add rules one-by-one (explicit, clearer logs if any rule fails to compile)
|
|
||||||
index = 0
|
|
||||||
total = len(loaded_rules)
|
|
||||||
while index < total:
|
|
||||||
engine.add_rule(loaded_rules[index])
|
|
||||||
index = index + 1
|
|
||||||
app.logger.info(f"[init] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
|
|
||||||
except Exception as e:
|
|
||||||
app.logger.warning(f"[init] Failed loading rules from {rules_path}: {e}")
|
|
||||||
else:
|
|
||||||
app.logger.warning(f"[init] Rules file not found at {rules_path}. Engine will start with zero rules.")
|
|
||||||
|
|
||||||
# Built-in function-based rules
|
|
||||||
adapter = FactAdapter(logger=app.logger)
|
|
||||||
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="form_action_missing",
|
|
||||||
description="Form has no action attribute",
|
|
||||||
category="form",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="form_http_on_https_page",
|
|
||||||
description="Form submits via HTTP from HTTPS page",
|
|
||||||
category="form",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="form_submits_to_different_host",
|
|
||||||
description="Form submits to a different host",
|
|
||||||
category="form",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
# Script rules expect dict 'facts' (you’ll wire per-script facts later)
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="script_src_uses_data_or_blob",
|
|
||||||
description="Script src uses data:/blob: URL",
|
|
||||||
category="script",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="script_src_has_dangerous_extension",
|
|
||||||
description="External script with dangerous extension",
|
|
||||||
category="script",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
engine.add_rule(Rule(
|
|
||||||
name="script_third_party_host",
|
|
||||||
description="Script is from a third-party host",
|
|
||||||
category="script",
|
|
||||||
rule_type="function",
|
|
||||||
function=FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter),
|
|
||||||
))
|
|
||||||
|
|
||||||
# Store engine both ways: attribute (convenient) and config
|
|
||||||
app.rule_engine = engine
|
|
||||||
app.config["RULE_ENGINE"] = engine
|
|
||||||
|
|
||||||
# App metadata available to templates
|
# App metadata available to templates
|
||||||
app.config["APP_NAME"] = settings.app.name
|
app.config["APP_NAME"] = settings.app.name
|
||||||
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
|
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
|
||||||
@@ -140,9 +45,10 @@ def create_app() -> Flask:
|
|||||||
app.register_blueprint(ui.bp)
|
app.register_blueprint(ui.bp)
|
||||||
app.register_blueprint(api.api_bp)
|
app.register_blueprint(api.api_bp)
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
# Example log lines so we know we booted cleanly
|
# Example log lines so we know we booted cleanly
|
||||||
app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
app_logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
||||||
app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
app_logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
||||||
app.logger.info(f"Registered {len(engine.rules)} total rules (YAML + function)")
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|||||||
@@ -30,12 +30,14 @@ import time
|
|||||||
from flask import Blueprint, request, jsonify, current_app, send_file, abort
|
from flask import Blueprint, request, jsonify, current_app, send_file, abort
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.logging_setup import get_app_logger
|
||||||
from app.utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
from app.utils.external_fetcher import ExternalScriptFetcher
|
from app.utils.external_fetcher import ExternalScriptFetcher
|
||||||
from werkzeug.exceptions import HTTPException
|
from werkzeug.exceptions import HTTPException
|
||||||
|
|
||||||
api_bp = Blueprint("api", __name__, url_prefix="/api")
|
api_bp = Blueprint("api", __name__, url_prefix="/api")
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
def _resolve_results_path(job_id: str) -> str:
|
def _resolve_results_path(job_id: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -83,7 +85,7 @@ def _api_500(err):
|
|||||||
Return JSON for server errors and log the exception.
|
Return JSON for server errors and log the exception.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
current_app.logger.exception("API 500")
|
app_logger.exception("API 500")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return jsonify({"ok": False, "error": "internal server error"}), 500
|
return jsonify({"ok": False, "error": "internal server error"}), 500
|
||||||
@@ -107,7 +109,7 @@ def analyze_script():
|
|||||||
script_url = (script_url_raw or "").strip() if isinstance(script_url_raw, str) else ""
|
script_url = (script_url_raw or "").strip() if isinstance(script_url_raw, str) else ""
|
||||||
|
|
||||||
# log this request
|
# log this request
|
||||||
current_app.logger.info(f"Got request to analyze {script_url} via API ")
|
app_logger.info(f"Got request to analyze {script_url} via API ")
|
||||||
|
|
||||||
if not job_id or not script_url:
|
if not job_id or not script_url:
|
||||||
return jsonify({"ok": False, "error": "Missing job_id (or uuid) or url"}), 400
|
return jsonify({"ok": False, "error": "Missing job_id (or uuid) or url"}), 400
|
||||||
@@ -174,7 +176,7 @@ def analyze_script():
|
|||||||
findings = matched
|
findings = matched
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
try:
|
try:
|
||||||
current_app.logger.error("Rule engine error", extra={"error": str(exc)})
|
app_logger.error("Rule engine error", extra={"error": str(exc)})
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
findings = []
|
findings = []
|
||||||
|
|||||||
@@ -7,10 +7,14 @@ from pathlib import Path
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
||||||
|
|
||||||
|
from app.utils.url_tools import get_url_normalizer
|
||||||
from app.utils.browser import get_browser
|
from app.utils.browser import get_browser
|
||||||
from app.utils.enrichment import enrich_url
|
from app.utils.enrichment import enrich_url
|
||||||
from app.utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
from app.utils.io_helpers import get_recent_results
|
from app.utils.io_helpers import get_recent_results
|
||||||
|
from app.logging_setup import get_app_logger
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
bp = Blueprint("main", __name__)
|
bp = Blueprint("main", __name__)
|
||||||
|
|
||||||
@@ -18,6 +22,47 @@ settings = get_settings()
|
|||||||
app_name = settings.app.name
|
app_name = settings.app.name
|
||||||
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
||||||
|
|
||||||
|
|
||||||
|
# --- data cleaner for tls to ensure data is standardized
|
||||||
|
def normalize_ssl_tls_for_view(ssl_tls):
|
||||||
|
"""
|
||||||
|
Normalize/guard the ssl_tls structure for template rendering.
|
||||||
|
Adds missing keys so Jinja doesn't need defensive checks everywhere.
|
||||||
|
"""
|
||||||
|
safe = {"crtsh": None, "probe": None, "error": None, "skipped": False, "reason": None}
|
||||||
|
|
||||||
|
if not isinstance(ssl_tls, dict):
|
||||||
|
safe["error"] = "ssl_tls is not a dict"
|
||||||
|
return safe
|
||||||
|
|
||||||
|
safe.update(ssl_tls)
|
||||||
|
|
||||||
|
if safe.get("skipped") is True:
|
||||||
|
return safe # don’t force probe/crtsh keys when skipped
|
||||||
|
|
||||||
|
# Probe guards
|
||||||
|
probe = safe.get("probe") or {}
|
||||||
|
if "results_by_version" not in probe or not isinstance(probe["results_by_version"], dict):
|
||||||
|
probe["results_by_version"] = {}
|
||||||
|
if "weak_protocols" not in probe or not isinstance(probe["weak_protocols"], list):
|
||||||
|
probe["weak_protocols"] = []
|
||||||
|
if "weak_ciphers" not in probe or not isinstance(probe["weak_ciphers"], list):
|
||||||
|
probe["weak_ciphers"] = []
|
||||||
|
if "errors" not in probe or not isinstance(probe["errors"], list):
|
||||||
|
probe["errors"] = []
|
||||||
|
if "hostname" not in probe:
|
||||||
|
probe["hostname"] = None
|
||||||
|
if "port" not in probe:
|
||||||
|
probe["port"] = 443
|
||||||
|
safe["probe"] = probe
|
||||||
|
|
||||||
|
# crt.sh guards (we keep it mostly raw; macro only reads a few fields)
|
||||||
|
if "crtsh" not in safe:
|
||||||
|
safe["crtsh"] = None
|
||||||
|
|
||||||
|
return safe
|
||||||
|
|
||||||
|
|
||||||
# --- context processor ---
|
# --- context processor ---
|
||||||
@bp.context_processor
|
@bp.context_processor
|
||||||
def inject_app_info():
|
def inject_app_info():
|
||||||
@@ -48,7 +93,7 @@ def index():
|
|||||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
|
|
||||||
# Build the recent list (non-fatal if storage is empty or unreadable)
|
# Build the recent list (non-fatal if storage is empty or unreadable)
|
||||||
recent_results = get_recent_results(storage, recent_count, current_app.logger)
|
recent_results = get_recent_results(storage, recent_count, app_logger)
|
||||||
|
|
||||||
# Pass to template; your index.html will hide the card if list is empty
|
# Pass to template; your index.html will hide the card if list is empty
|
||||||
return render_template("index.html", recent_results=recent_results)
|
return render_template("index.html", recent_results=recent_results)
|
||||||
@@ -56,8 +101,23 @@ def index():
|
|||||||
@bp.route("/analyze", methods=["POST"])
|
@bp.route("/analyze", methods=["POST"])
|
||||||
def analyze():
|
def analyze():
|
||||||
url = request.form.get("url", "").strip()
|
url = request.form.get("url", "").strip()
|
||||||
current_app.logger.info(f"[*] Analyzing {url}")
|
|
||||||
if not url:
|
# Checkbox comes as '1' when checked, or None when not present
|
||||||
|
fetch_ssl = request.form.get("fetch_ssl")
|
||||||
|
fetch_ssl_enabled = bool(fetch_ssl == "1")
|
||||||
|
|
||||||
|
normalizer = get_url_normalizer()
|
||||||
|
|
||||||
|
try:
|
||||||
|
target = normalizer.normalize_for_analysis(url)
|
||||||
|
except ValueError:
|
||||||
|
app_logger.warning("Empty or invalid URL input")
|
||||||
|
return redirect(url_for("index"))
|
||||||
|
|
||||||
|
app_logger.info(f"[*] Analyzing URL{target}")
|
||||||
|
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
|
||||||
|
|
||||||
|
if not target:
|
||||||
flash("Please enter a URL.", "error")
|
flash("Please enter a URL.", "error")
|
||||||
return redirect(url_for("main.index"))
|
return redirect(url_for("main.index"))
|
||||||
|
|
||||||
@@ -66,44 +126,57 @@ def analyze():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
browser = get_browser()
|
browser = get_browser()
|
||||||
result = asyncio.run(browser.fetch_page_artifacts(url))
|
result = asyncio.run(browser.fetch_page_artifacts(url,fetch_ssl_enabled=fetch_ssl_enabled))
|
||||||
current_app.logger.info(f"[+] Analysis done for {url}")
|
app_logger.info(f"[+] Analysis done for {url}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
flash(f"Analysis failed: {e}", "error")
|
flash(f"Analysis failed: {e}", "error")
|
||||||
current_app.logger.error(f"Analysis failed for {url}: {e}")
|
app_logger.error(f"Analysis failed for {url}: {e}")
|
||||||
return redirect(url_for("main.index"))
|
return redirect(url_for("main.index"))
|
||||||
|
|
||||||
# Add enrichment safely
|
# Add enrichment safely
|
||||||
try:
|
try:
|
||||||
enrichment = enrich_url(url)
|
enrichment = enrich_url(url)
|
||||||
result["enrichment"] = enrichment
|
result["enrichment"] = enrichment
|
||||||
current_app.logger.info(f"[+] Enrichment added for {url}")
|
app_logger.info(f"[+] Enrichment added for {url}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result["enrichment"] = {}
|
result["enrichment"] = {}
|
||||||
current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
app_logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
||||||
|
|
||||||
# Redirect to permalink page for this run
|
# Redirect to permalink page for this run
|
||||||
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||||
|
|
||||||
@bp.route("/results/<run_uuid>", methods=["GET"])
|
@bp.route("/results/<run_uuid>", methods=["GET"])
|
||||||
def view_result(run_uuid: str):
|
def view_result(run_uuid: str):
|
||||||
|
"""
|
||||||
|
View the analysis results for a given run UUID.
|
||||||
|
Loads results.json from SANDBOX_STORAGE/<uuid>,
|
||||||
|
normalizes structures for template safety, and renders the result page.
|
||||||
|
"""
|
||||||
# Resolve SANDBOX_STORAGE from app config
|
# Resolve SANDBOX_STORAGE from app config
|
||||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
run_dir = storage / run_uuid
|
run_dir = storage / run_uuid
|
||||||
results_path = run_dir / "results.json"
|
results_path = run_dir / "results.json"
|
||||||
|
|
||||||
|
# Ensure results exist
|
||||||
if not results_path.exists():
|
if not results_path.exists():
|
||||||
current_app.logger.error(f"Results not found for UUID: {run_uuid}")
|
app_logger.error(f"Results not found for UUID: {run_uuid}")
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
|
# Load the results JSON
|
||||||
with open(results_path, "r", encoding="utf-8") as f:
|
with open(results_path, "r", encoding="utf-8") as f:
|
||||||
result = json.load(f)
|
result = json.load(f)
|
||||||
|
|
||||||
# Pass the UUID to the template for artifact links
|
# Add UUID so template can build artifact links
|
||||||
result["uuid"] = run_uuid
|
result["uuid"] = run_uuid
|
||||||
|
|
||||||
|
# === Normalize SSL/TLS structure for safe rendering ===
|
||||||
|
if "ssl_tls" in result:
|
||||||
|
result["ssl_tls"] = normalize_ssl_tls_for_view(result["ssl_tls"])
|
||||||
|
|
||||||
|
# Pass the enriched result dict to the template
|
||||||
return render_template("result.html", **result)
|
return render_template("result.html", **result)
|
||||||
|
|
||||||
|
|
||||||
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
||||||
def artifacts(run_uuid: str, filename: str):
|
def artifacts(run_uuid: str, filename: str):
|
||||||
# Resolve SANDBOX_STORAGE from app config
|
# Resolve SANDBOX_STORAGE from app config
|
||||||
@@ -115,11 +188,11 @@ def artifacts(run_uuid: str, filename: str):
|
|||||||
try:
|
try:
|
||||||
full_path.relative_to(run_dir.resolve())
|
full_path.relative_to(run_dir.resolve())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
current_app.logger.warning(f"Directory traversal attempt: {filename}")
|
app_logger.warning(f"Directory traversal attempt: {filename}")
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
if not full_path.exists():
|
if not full_path.exists():
|
||||||
current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
app_logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
return send_file(full_path)
|
return send_file(full_path)
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ app:
|
|||||||
name: SneakyScope
|
name: SneakyScope
|
||||||
version_major: 0
|
version_major: 0
|
||||||
version_minor: 1
|
version_minor: 1
|
||||||
|
print_rule_loads: True
|
||||||
|
|
||||||
cache:
|
cache:
|
||||||
recent_runs_count: 10
|
recent_runs_count: 10
|
||||||
|
|||||||
61
app/logging_setup.py
Normal file
61
app/logging_setup.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# app/logging_setup.py
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_LOGGING_WIRED = False # module-level guard
|
||||||
|
|
||||||
|
def _clear(logger: logging.Logger) -> None:
|
||||||
|
for h in list(logger.handlers):
|
||||||
|
logger.removeHandler(h)
|
||||||
|
|
||||||
|
def wire_logging_once(app) -> None:
|
||||||
|
global _LOGGING_WIRED
|
||||||
|
if _LOGGING_WIRED:
|
||||||
|
return
|
||||||
|
_LOGGING_WIRED = True
|
||||||
|
|
||||||
|
# Reuse gunicorn handlers if present
|
||||||
|
guni = logging.getLogger("gunicorn.error")
|
||||||
|
|
||||||
|
# Clear Flask's default handlers
|
||||||
|
try:
|
||||||
|
app.logger.handlers.clear()
|
||||||
|
except Exception:
|
||||||
|
for h in list(app.logger.handlers):
|
||||||
|
app.logger.removeHandler(h)
|
||||||
|
|
||||||
|
for name in ("sneakyscope.app", "sneakyscope.engine"):
|
||||||
|
_clear(logging.getLogger(name))
|
||||||
|
|
||||||
|
|
||||||
|
# Fallback formatter shows logger name to distinguish engine/app
|
||||||
|
h = logging.StreamHandler(sys.stdout)
|
||||||
|
fmt = logging.Formatter(
|
||||||
|
"[%(asctime)s] pid=%(process)d %(levelname)-8s %(name)s %(module)-18s %(message)s (line %(lineno)s)"
|
||||||
|
)
|
||||||
|
h.setFormatter(fmt)
|
||||||
|
h.setFormatter(fmt)
|
||||||
|
h.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# 3) Attach to app + project loggers; stop propagation everywhere
|
||||||
|
app.logger.addHandler(h); app.logger.setLevel(logging.INFO); app.logger.propagate = False
|
||||||
|
eng = logging.getLogger("sneakyscope.engine")
|
||||||
|
app_lg = logging.getLogger("sneakyscope.app")
|
||||||
|
eng.addHandler(h); eng.setLevel(logging.INFO); eng.propagate = False
|
||||||
|
app_lg.addHandler(h); app_lg.setLevel(logging.INFO); app_lg.propagate = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_engine_logger() -> logging.Logger:
|
||||||
|
logger = logging.getLogger("sneakyscope.engine")
|
||||||
|
logger.propagate = False
|
||||||
|
if logger.level == logging.NOTSET:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
def get_app_logger() -> logging.Logger:
|
||||||
|
logger = logging.getLogger("sneakyscope.app")
|
||||||
|
logger.propagate = False
|
||||||
|
if logger.level == logging.NOTSET:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
return logger
|
||||||
51
app/rules/factory.py
Normal file
51
app/rules/factory.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# app/rules/factory.py
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.logging_setup import get_engine_logger
|
||||||
|
from app.rules.rules_engine import RuleEngine
|
||||||
|
from app.rules.rules_engine import Rule
|
||||||
|
from app.rules.function_rules import FunctionRuleAdapter
|
||||||
|
from app.rules.function_rules import (
|
||||||
|
form_action_missing, form_http_on_https_page, form_submits_to_different_host,
|
||||||
|
script_src_uses_data_or_blob, script_src_has_dangerous_extension, script_third_party_host,
|
||||||
|
)
|
||||||
|
|
||||||
|
from app.rules.rules_engine import load_rules_from_yaml
|
||||||
|
|
||||||
|
base_dir = Path(__file__).resolve().parent.parent
|
||||||
|
RULES_FILE_PATH = base_dir / "config" / "suspicious_rules.yaml"
|
||||||
|
|
||||||
|
log = get_engine_logger()
|
||||||
|
|
||||||
|
def build_rules_engine() -> RuleEngine:
|
||||||
|
eng = RuleEngine()
|
||||||
|
|
||||||
|
# 1) YAML rules
|
||||||
|
yaml_rules = load_rules_from_yaml(RULES_FILE_PATH)
|
||||||
|
for r in yaml_rules:
|
||||||
|
eng.add_rule(r)
|
||||||
|
log.info("Found %d suspicious rules from %s",
|
||||||
|
len(yaml_rules), getattr(yaml_rules, "source_path", "config"))
|
||||||
|
|
||||||
|
# 2) Function rules
|
||||||
|
from app.rules.function_rules import FactAdapter
|
||||||
|
adapter = FactAdapter()
|
||||||
|
|
||||||
|
def add(rule: Rule):
|
||||||
|
eng.add_rule(rule)
|
||||||
|
|
||||||
|
add(Rule("form_action_missing", "Form has no action attribute", "form", "function",
|
||||||
|
FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing")))
|
||||||
|
add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
|
||||||
|
FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
|
||||||
|
add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
|
||||||
|
FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
|
||||||
|
add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
|
||||||
|
FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
|
||||||
|
add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
|
||||||
|
FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
|
||||||
|
add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
|
||||||
|
FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
|
||||||
|
|
||||||
|
log.info("Registered %d total rules (YAML + function)", len(eng.rules))
|
||||||
|
return eng
|
||||||
@@ -19,10 +19,16 @@ Note:
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Callable, Dict, Optional, Tuple
|
||||||
|
import inspect
|
||||||
|
import logging
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:void(0);"}
|
from app.logging_setup import get_app_logger
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
|
_NOOP_ACTIONS = {"", "#", "javascript:void(0)", "javascript:", "about:blank"}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Adapters
|
# Adapters
|
||||||
@@ -36,9 +42,6 @@ class FactAdapter:
|
|||||||
You can expand the per-category parsers over time as needed.
|
You can expand the per-category parsers over time as needed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, logger: Optional[Any] = None) -> None:
|
|
||||||
self.logger = logger
|
|
||||||
|
|
||||||
def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
|
def adapt(self, text_or_facts: Any, category: str = "") -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Adapt text_or_facts (str or dict) into a facts dict.
|
Adapt text_or_facts (str or dict) into a facts dict.
|
||||||
@@ -65,13 +68,11 @@ class FactAdapter:
|
|||||||
elif category == "text":
|
elif category == "text":
|
||||||
return {"category": "text", "raw": text_or_facts}
|
return {"category": "text", "raw": text_or_facts}
|
||||||
else:
|
else:
|
||||||
if self.logger:
|
app_logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
|
||||||
self.logger.warning(f"[FactAdapter] Unknown category '{category}', returning raw snippet.")
|
|
||||||
return {"category": category, "raw": text_or_facts}
|
return {"category": category, "raw": text_or_facts}
|
||||||
|
|
||||||
# Fallback for unrecognized input types
|
# Fallback for unrecognized input types
|
||||||
if self.logger:
|
app_logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
|
||||||
self.logger.warning(f"[FactAdapter] Unsupported input type: {type(text_or_facts)!r}")
|
|
||||||
return {"category": category, "raw": text_or_facts}
|
return {"category": category, "raw": text_or_facts}
|
||||||
|
|
||||||
# ---- Per-category parsers ----
|
# ---- Per-category parsers ----
|
||||||
@@ -109,23 +110,149 @@ class FactAdapter:
|
|||||||
|
|
||||||
class FunctionRuleAdapter:
|
class FunctionRuleAdapter:
|
||||||
"""
|
"""
|
||||||
Callable wrapper that adapts engine input (str or dict) into 'facts' and then
|
Wraps a function-based rule so it ALWAYS returns:
|
||||||
invokes the underlying function rule that expects a facts dict.
|
- match: (True, Dict[str, Any])
|
||||||
|
- no match: (False, None)
|
||||||
|
|
||||||
Usage:
|
Also adapts non-dict inputs into facts via a provided 'adapter' using a
|
||||||
wrapped = FunctionRuleAdapter(fn=form_action_missing, category="form", adapter=FactAdapter(app.logger))
|
duck-typed protocol, so callers can pass raw items (e.g., strings/nodes).
|
||||||
matched, reason = wrapped("action=https://...") # engine-friendly
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, fn, category: str = "", adapter: Optional[FactAdapter] = None) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
fn: Callable[[Dict[str, Any]], Any],
|
||||||
|
category: str,
|
||||||
|
adapter: Optional[Any] = None,
|
||||||
|
rule_name: Optional[str] = None,
|
||||||
|
logger: Optional[logging.Logger] = None,
|
||||||
|
):
|
||||||
self.fn = fn
|
self.fn = fn
|
||||||
self.category = category
|
self.category = category
|
||||||
self.adapter = adapter or FactAdapter()
|
self.adapter = adapter
|
||||||
|
self.rule_name = rule_name or getattr(fn, "__name__", "<anonymous>")
|
||||||
|
|
||||||
def __call__(self, text_or_facts: Any):
|
|
||||||
facts = self.adapter.adapt(text_or_facts, category=self.category)
|
|
||||||
return self.fn(facts)
|
|
||||||
|
|
||||||
|
# ---------- helpers ----------
|
||||||
|
|
||||||
|
def _adapt_to_facts(self, raw: Any) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Convert whatever the engine passed into a facts dict.
|
||||||
|
Tries the provided adapter using a duck-typed protocol.
|
||||||
|
Returns a dict, or None if we can't adapt.
|
||||||
|
"""
|
||||||
|
# Already a dict? Use it.
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
return raw
|
||||||
|
|
||||||
|
# Try adapter if provided
|
||||||
|
if self.adapter is not None:
|
||||||
|
# Preferred generic signatures
|
||||||
|
for meth in ("build_facts", "facts", "to_facts"):
|
||||||
|
fn = getattr(self.adapter, meth, None)
|
||||||
|
if callable(fn):
|
||||||
|
try:
|
||||||
|
facts = fn(self.category, raw)
|
||||||
|
if isinstance(facts, dict):
|
||||||
|
return facts
|
||||||
|
except Exception as exc:
|
||||||
|
app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
|
||||||
|
|
||||||
|
# Category-specific fallbacks: build_<category>_facts / <category>_facts
|
||||||
|
cands = (f"build_{self.category}_facts", f"{self.category}_facts")
|
||||||
|
for meth in cands:
|
||||||
|
fn = getattr(self.adapter, meth, None)
|
||||||
|
if callable(fn):
|
||||||
|
try:
|
||||||
|
facts = fn(raw)
|
||||||
|
if isinstance(facts, dict):
|
||||||
|
return facts
|
||||||
|
except Exception as exc:
|
||||||
|
app_logger.exception("[Rule] '%s' adapter.%s failed: %s", self.rule_name, meth, exc)
|
||||||
|
|
||||||
|
# No way to adapt
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _coerce_return(self, outcome: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Normalize rule function returns:
|
||||||
|
|
||||||
|
accepted:
|
||||||
|
(bool, dict|None)
|
||||||
|
(bool, str) -> dict {'note': str} on match
|
||||||
|
(bool,) or bool -> (bool, None)
|
||||||
|
|
||||||
|
On invalid shapes, treat as no-match.
|
||||||
|
"""
|
||||||
|
# Exact 2-tuple
|
||||||
|
if isinstance(outcome, tuple) and len(outcome) == 2:
|
||||||
|
matched = bool(outcome[0])
|
||||||
|
raw = outcome[1]
|
||||||
|
|
||||||
|
if not matched:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
if raw is None:
|
||||||
|
return True, {} # match with empty payload is fine
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
return True, raw
|
||||||
|
if isinstance(raw, str):
|
||||||
|
return True, {"note": raw}
|
||||||
|
|
||||||
|
app_logger.warning("[Rule] '%s' returned payload of invalid type: %s",
|
||||||
|
self.rule_name, type(raw).__name__)
|
||||||
|
# Still treat as match but give minimal payload
|
||||||
|
return True, {"note": "coerced-invalid-payload", "value_repr": repr(raw)}
|
||||||
|
|
||||||
|
# Legacy: (bool,) or bare bool
|
||||||
|
if isinstance(outcome, tuple) and len(outcome) == 1 and isinstance(outcome[0], bool):
|
||||||
|
return (True, {}) if outcome[0] else (False, None)
|
||||||
|
if isinstance(outcome, bool):
|
||||||
|
return (True, {}) if outcome else (False, None)
|
||||||
|
|
||||||
|
# Junk -> no match
|
||||||
|
app_logger.warning("[Rule] '%s' returned invalid shape: %s",
|
||||||
|
self.rule_name, type(outcome).__name__)
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
# ---------- callable ----------
|
||||||
|
|
||||||
|
def __call__(self, raw: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Apply the wrapped rule to the provided item (raw or facts).
|
||||||
|
Returns:
|
||||||
|
(True, dict) on match
|
||||||
|
(False, None) on no match
|
||||||
|
"""
|
||||||
|
facts = self._adapt_to_facts(raw)
|
||||||
|
if facts is None:
|
||||||
|
app_logger.warning("[Rule] '%s' received non-dict facts (%s). Coercing to miss.",
|
||||||
|
self.rule_name, type(raw).__name__)
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
try:
|
||||||
|
outcome = self.fn(facts)
|
||||||
|
except Exception as exc:
|
||||||
|
app_logger.exception("[Rule] '%s' raised: %s", self.rule_name, exc)
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
matched, payload = self._coerce_return(outcome)
|
||||||
|
return matched, payload
|
||||||
|
|
||||||
|
|
||||||
|
def _hit(payload: Optional[Dict[str, Any]] = None) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Standardize a positive match result: (True, dict)
|
||||||
|
"""
|
||||||
|
if payload is None:
|
||||||
|
payload = {}
|
||||||
|
return True, payload
|
||||||
|
|
||||||
|
|
||||||
|
def _miss() -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Standardize a negative match result: (False, None)
|
||||||
|
"""
|
||||||
|
return False, None
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Function-based rules (dict 'facts' expected)
|
# Function-based rules (dict 'facts' expected)
|
||||||
@@ -133,20 +260,25 @@ class FunctionRuleAdapter:
|
|||||||
|
|
||||||
# ---------------- Script rules ----------------
|
# ---------------- Script rules ----------------
|
||||||
|
|
||||||
def script_src_uses_data_or_blob(facts: Dict[str, Any]):
|
def script_src_uses_data_or_blob(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags <script> tags with src='data:' or 'blob:'."""
|
"""Flags <script> tags with src='data:' or 'blob:'."""
|
||||||
src = facts.get("src") or ""
|
src = facts.get("src") or ""
|
||||||
if isinstance(src, str) and src.startswith(("data:", "blob:")):
|
if isinstance(src, str) and src.startswith(("data:", "blob:")):
|
||||||
scheme = src.split(":", 1)[0]
|
scheme = src.split(":", 1)[0]
|
||||||
return True, f"Script src uses {scheme}: URL"
|
return _hit({
|
||||||
return False, None
|
"scheme": scheme,
|
||||||
|
"src": src,
|
||||||
|
"note": f"Script src uses {scheme}: URL"
|
||||||
|
})
|
||||||
|
return _miss()
|
||||||
|
|
||||||
|
|
||||||
def script_src_has_dangerous_extension(facts: Dict[str, Any]):
|
def script_src_has_dangerous_extension(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
|
"""Flags <script> tags with dangerous file extensions (e.g., .vbs, .hta)."""
|
||||||
src = facts.get("src") or ""
|
src = facts.get("src") or ""
|
||||||
if not isinstance(src, str):
|
if not isinstance(src, str):
|
||||||
return False, None
|
return _miss()
|
||||||
|
|
||||||
low = src.lower()
|
low = src.lower()
|
||||||
dangerous = (".vbs", ".hta")
|
dangerous = (".vbs", ".hta")
|
||||||
i = 0
|
i = 0
|
||||||
@@ -154,31 +286,43 @@ def script_src_has_dangerous_extension(facts: Dict[str, Any]):
|
|||||||
while i < m:
|
while i < m:
|
||||||
ext = dangerous[i]
|
ext = dangerous[i]
|
||||||
if low.endswith(ext):
|
if low.endswith(ext):
|
||||||
return True, f"External script has dangerous extension ({ext})"
|
return _hit({
|
||||||
|
"ext": ext,
|
||||||
|
"src": src,
|
||||||
|
"note": f"External script has dangerous extension ({ext})"
|
||||||
|
})
|
||||||
i = i + 1
|
i = i + 1
|
||||||
return False, None
|
|
||||||
|
return _miss()
|
||||||
|
|
||||||
|
|
||||||
def script_third_party_host(facts: Dict[str, Any]):
|
def script_third_party_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags scripts loaded from a different hostname than the page."""
|
"""Flags scripts loaded from a different hostname than the page."""
|
||||||
base_host = facts.get("base_hostname") or ""
|
base_host = facts.get("base_hostname") or ""
|
||||||
src_host = facts.get("src_hostname") or ""
|
src_host = facts.get("src_hostname") or ""
|
||||||
if base_host and src_host and base_host != src_host:
|
if base_host and src_host and base_host != src_host:
|
||||||
return True, f"Third-party script host: {src_host}"
|
return _hit({
|
||||||
return False, None
|
"base_host": base_host,
|
||||||
|
"src_host": src_host,
|
||||||
|
"note": f"Third-party script host: {src_host}"
|
||||||
|
})
|
||||||
|
return _miss()
|
||||||
|
|
||||||
|
|
||||||
# ---------------- Form rules ----------------
|
# ---------------- Form rules ----------------
|
||||||
|
|
||||||
def form_action_missing(facts: Dict[str, Any]):
|
def form_action_missing(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags <form> elements with no meaningful action attribute."""
|
"""Flags <form> elements with no meaningful action attribute."""
|
||||||
action = (facts.get("action") or "").strip()
|
action = (facts.get("action") or "").strip()
|
||||||
if action in _NOOP_ACTIONS:
|
if action in _NOOP_ACTIONS:
|
||||||
return True, "Form has no action attribute (or uses a no-op action)"
|
return _hit({
|
||||||
return False, None
|
"action": action,
|
||||||
|
"note": "Form has no action attribute (or uses a no-op action)"
|
||||||
|
})
|
||||||
|
return _miss()
|
||||||
|
|
||||||
|
|
||||||
def form_http_on_https_page(facts: Dict[str, Any]):
|
def form_http_on_https_page(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
|
"""Flags forms submitting over HTTP while the page was loaded over HTTPS."""
|
||||||
base_url = (facts.get("base_url") or "").strip()
|
base_url = (facts.get("base_url") or "").strip()
|
||||||
action = (facts.get("action") or "").strip()
|
action = (facts.get("action") or "").strip()
|
||||||
@@ -188,30 +332,38 @@ def form_http_on_https_page(facts: Dict[str, Any]):
|
|||||||
parsed_act = urlparse(action)
|
parsed_act = urlparse(action)
|
||||||
act_scheme = (parsed_act.scheme or "").lower()
|
act_scheme = (parsed_act.scheme or "").lower()
|
||||||
except Exception:
|
except Exception:
|
||||||
return False, None # parsing trouble → don’t flag
|
return _miss() # parsing trouble → don’t flag
|
||||||
|
|
||||||
# Only flag absolute http:// actions on https pages.
|
# Only flag absolute http:// actions on https pages.
|
||||||
# Relative or schemeless ('//host/...') isn’t flagged here (it won’t be HTTP on an HTTPS page).
|
|
||||||
if base_scheme == "https" and act_scheme == "http":
|
if base_scheme == "https" and act_scheme == "http":
|
||||||
return True, f"Submits over insecure HTTP (action={parsed_act.geturl()})"
|
return _hit({
|
||||||
return False, None
|
"base_url": base_url,
|
||||||
|
"action": parsed_act.geturl(),
|
||||||
|
"note": "Submits over insecure HTTP"
|
||||||
|
})
|
||||||
|
return _miss()
|
||||||
|
|
||||||
|
|
||||||
def form_submits_to_different_host(facts: Dict[str, Any]):
|
def form_submits_to_different_host(facts: Dict[str, Any]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
||||||
"""Flags <form> actions that submit to a different hostname than the page."""
|
"""Flags <form> actions that submit to a different hostname than the page."""
|
||||||
base_host = (facts.get("base_hostname") or "").strip().lower()
|
base_host = (facts.get("base_hostname") or "").strip().lower()
|
||||||
action = (facts.get("action") or "").strip()
|
action = (facts.get("action") or "").strip()
|
||||||
|
|
||||||
if not action or action in _NOOP_ACTIONS:
|
if not action or action in _NOOP_ACTIONS:
|
||||||
return False, None
|
return _miss()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = urlparse(action)
|
parsed = urlparse(action)
|
||||||
act_host = (parsed.hostname or "").lower()
|
act_host = (parsed.hostname or "").lower()
|
||||||
except Exception:
|
except Exception:
|
||||||
return False, None
|
return _miss()
|
||||||
|
|
||||||
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
|
# Only compare when the action specifies a host (absolute URL or schemeless //host/path).
|
||||||
if act_host and base_host and act_host != base_host:
|
if act_host and base_host and act_host != base_host:
|
||||||
return True, f"Submits to a different host ({act_host} vs {base_host})"
|
return _hit({
|
||||||
return False, None
|
"base_host": base_host,
|
||||||
|
"act_host": act_host,
|
||||||
|
"action": action,
|
||||||
|
"note": "Submits to a different host"
|
||||||
|
})
|
||||||
|
return _miss()
|
||||||
@@ -1,9 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
rules_engine.py
|
rules_engine.py
|
||||||
|
|
||||||
Flask-logger integrated rules engine for SneakyScope.
|
|
||||||
Logs go to `current_app.logger` when a Flask app context is active,
|
|
||||||
otherwise to a namespaced standard logger "sneakyscope.rules".
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -12,6 +8,11 @@ from dataclasses import dataclass, asdict, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from app.logging_setup import get_engine_logger
|
||||||
|
from app.utils.settings import get_settings
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -22,16 +23,7 @@ except Exception:
|
|||||||
def has_app_context() -> bool: # type: ignore
|
def has_app_context() -> bool: # type: ignore
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
logger = get_engine_logger()
|
||||||
def get_engine_logger() -> logging.Logger:
|
|
||||||
"""
|
|
||||||
Return a logger that prefers Flask's current_app.logger if available.
|
|
||||||
Falls back to a namespaced standard logger otherwise.
|
|
||||||
"""
|
|
||||||
if has_app_context() and current_app is not None and hasattr(current_app, "logger"):
|
|
||||||
return current_app.logger
|
|
||||||
return logging.getLogger("sneakyscope.rules")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Rule:
|
class Rule:
|
||||||
@@ -53,15 +45,13 @@ class Rule:
|
|||||||
# Internal compiled regex cache (not serialized)
|
# Internal compiled regex cache (not serialized)
|
||||||
_compiled_regex: Optional[re.Pattern] = field(default=None, repr=False, compare=False)
|
_compiled_regex: Optional[re.Pattern] = field(default=None, repr=False, compare=False)
|
||||||
|
|
||||||
def compile_if_needed(self, logger: Optional[logging.Logger] = None) -> bool:
|
def compile_if_needed(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Compile the regex pattern once for performance, if applicable.
|
Compile the regex pattern once for performance, if applicable.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if the regex is compiled and ready, False otherwise.
|
bool: True if the regex is compiled and ready, False otherwise.
|
||||||
"""
|
"""
|
||||||
if logger is None:
|
|
||||||
logger = get_engine_logger()
|
|
||||||
|
|
||||||
if self.rule_type == "regex" and self.pattern:
|
if self.rule_type == "regex" and self.pattern:
|
||||||
try:
|
try:
|
||||||
@@ -74,23 +64,20 @@ class Rule:
|
|||||||
return False
|
return False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def run(self, text: str, logger: Optional[logging.Logger] = None) -> Tuple[bool, str]:
|
def run(self, text: str) -> Tuple[bool, str]:
|
||||||
"""
|
"""
|
||||||
Run the rule on the given text.
|
Run the rule on the given text.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(matched: bool, reason: str)
|
(matched: bool, reason: str)
|
||||||
"""
|
"""
|
||||||
if logger is None:
|
|
||||||
logger = get_engine_logger()
|
|
||||||
|
|
||||||
if self.rule_type == "regex":
|
if self.rule_type == "regex":
|
||||||
if not self.pattern:
|
if not self.pattern:
|
||||||
logger.warning(f"[Rule] '{self.name}' missing regex pattern.")
|
logger.warning(f"[Rule] '{self.name}' missing regex pattern.")
|
||||||
return False, "Invalid rule configuration: missing pattern"
|
return False, "Invalid rule configuration: missing pattern"
|
||||||
|
|
||||||
if self._compiled_regex is None:
|
if self._compiled_regex is None:
|
||||||
compiled_ok = self.compile_if_needed(logger=logger)
|
compiled_ok = self.compile_if_needed()
|
||||||
if not compiled_ok:
|
if not compiled_ok:
|
||||||
return False, f"Invalid regex pattern: {self.pattern!r}"
|
return False, f"Invalid regex pattern: {self.pattern!r}"
|
||||||
|
|
||||||
@@ -137,44 +124,111 @@ class RuleEngine:
|
|||||||
Loads and executes rules against provided text, with Flask-aware logging.
|
Loads and executes rules against provided text, with Flask-aware logging.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rules: Optional[List[Rule]] = None, logger: Optional[logging.Logger] = None):
|
def __init__(self, rules: Optional[List[Rule]] = None):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
rules: Optional initial rule list.
|
rules: Optional initial rule list.
|
||||||
logger: Optional explicit logger. If None, uses Flask app logger if available,
|
|
||||||
otherwise a namespaced standard logger.
|
|
||||||
"""
|
"""
|
||||||
if logger is None:
|
|
||||||
self.logger = get_engine_logger()
|
|
||||||
else:
|
|
||||||
self.logger = logger
|
|
||||||
|
|
||||||
self.rules: List[Rule] = rules or []
|
# IMPORTANT: back the property with a private list
|
||||||
self._compile_all()
|
self._rules = [] # was: self.rules = []
|
||||||
|
self._rule_keys = set()
|
||||||
|
self._rule_index = {}
|
||||||
|
|
||||||
def _compile_all(self) -> None:
|
# If a list of rules was provided, add them via add_rule so compilation happens
|
||||||
"""
|
if rules:
|
||||||
Compile all regex rules at initialization and warn about failures.
|
i = 0
|
||||||
"""
|
n = len(rules)
|
||||||
index = 0
|
while i < n:
|
||||||
total = len(self.rules)
|
self.add_rule(rules[i]) # compiles regex as needed
|
||||||
while index < total:
|
i = i + 1
|
||||||
rule = self.rules[index]
|
|
||||||
if rule.rule_type == "regex":
|
|
||||||
compiled_ok = rule.compile_if_needed(logger=self.logger)
|
|
||||||
if not compiled_ok:
|
|
||||||
self.logger.warning(f"[Engine] Regex failed at init for rule '{rule.name}' (pattern={rule.pattern!r})")
|
|
||||||
index = index + 1
|
|
||||||
|
|
||||||
def add_rule(self, rule: Rule) -> None:
|
def add_rule(self, rule: Rule, replace: bool = False) -> None:
|
||||||
"""
|
"""
|
||||||
Add a new rule at runtime; compiles regex if needed and logs failures.
|
Add a new rule at runtime; compiles regex if needed and logs failures.
|
||||||
|
|
||||||
|
Idempotent by (category, name):
|
||||||
|
- If the same (category, name) is already present:
|
||||||
|
* replace=False (default): ignore duplicate and warn.
|
||||||
|
* replace=True: replace the existing rule in place and recompile regex.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rule: Rule to add.
|
||||||
|
replace: If True, overwrite an existing rule with the same (category, name).
|
||||||
"""
|
"""
|
||||||
self.rules.append(rule)
|
# Ensure tracking structures exist in case __init__ wasn’t updated somewhere
|
||||||
|
if not hasattr(self, "_rule_keys"):
|
||||||
|
self._rule_keys = set()
|
||||||
|
if not hasattr(self, "_rule_index"):
|
||||||
|
self._rule_index = {}
|
||||||
|
i = 0
|
||||||
|
length = len(getattr(self, "_rules", []))
|
||||||
|
while i < length:
|
||||||
|
existing = self._rules[i]
|
||||||
|
key_i = (existing.category, existing.name)
|
||||||
|
self._rule_keys.add(key_i)
|
||||||
|
self._rule_index[key_i] = i
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
key = (rule.category, rule.name)
|
||||||
|
|
||||||
|
if key in self._rule_keys:
|
||||||
|
if not replace:
|
||||||
|
try:
|
||||||
|
logger.warning("[Rules] Duplicate registration ignored: %s/%s", rule.category, rule.name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
|
||||||
|
# Replace existing rule in place
|
||||||
|
idx = self._rule_index.get(key)
|
||||||
|
if idx is None:
|
||||||
|
idx = len(self._rules)
|
||||||
|
self._rules.append(rule)
|
||||||
|
self._rule_index[key] = idx
|
||||||
|
else:
|
||||||
|
self._rules[idx] = rule
|
||||||
|
|
||||||
|
if rule.rule_type == "regex":
|
||||||
|
compiled_ok = rule.compile_if_needed()
|
||||||
|
if not compiled_ok:
|
||||||
|
logger.warning(
|
||||||
|
"[Engine] Regex failed when replacing rule '%s' (pattern=%r)",
|
||||||
|
rule.name, getattr(rule, "pattern", None)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if settings.app.print_rule_loads:
|
||||||
|
logger.info(
|
||||||
|
"[engine] add_rule: %s/%s replace=%s -> count=%d",
|
||||||
|
rule.category, rule.name, bool(replace), len(self._rules)
|
||||||
|
)
|
||||||
|
|
||||||
|
# New rule path
|
||||||
|
self._rules.append(rule)
|
||||||
|
self._rule_keys.add(key)
|
||||||
|
self._rule_index[key] = len(self._rules) - 1
|
||||||
|
|
||||||
if rule.rule_type == "regex":
|
if rule.rule_type == "regex":
|
||||||
compiled_ok = rule.compile_if_needed(logger=self.logger)
|
compiled_ok = rule.compile_if_needed()
|
||||||
if not compiled_ok:
|
if not compiled_ok:
|
||||||
self.logger.warning(f"[Engine] Regex failed when adding rule '{rule.name}' (pattern={rule.pattern!r})")
|
logger.warning(
|
||||||
|
"[Engine] Regex failed when adding rule '%s' (pattern=%r)",
|
||||||
|
rule.name, getattr(rule, "pattern", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# helper, not used ATM
|
||||||
|
def add_rules(self, rules: list[Rule], replace: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Add many rules safely (idempotent). Uses the same semantics as add_rule.
|
||||||
|
"""
|
||||||
|
i = 0
|
||||||
|
n = len(rules)
|
||||||
|
while i < n:
|
||||||
|
self.add_rule(rules[i], replace=replace)
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
|
def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
@@ -198,7 +252,7 @@ class RuleEngine:
|
|||||||
index = index + 1
|
index = index + 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
matched, reason = rule.run(text, logger=self.logger)
|
matched, reason = rule.run(text)
|
||||||
|
|
||||||
result_str = "FAIL" if matched else "PASS"
|
result_str = "FAIL" if matched else "PASS"
|
||||||
reason_to_include: Optional[str]
|
reason_to_include: Optional[str]
|
||||||
@@ -220,11 +274,21 @@ class RuleEngine:
|
|||||||
results.append(asdict(rr))
|
results.append(asdict(rr))
|
||||||
index = index + 1
|
index = index + 1
|
||||||
|
|
||||||
self.logger.debug(f"[Engine] Completed evaluation. Returned {len(results)} rule results.")
|
logger.debug(f"[Engine] Completed evaluation. Returned {len(results)} rule results.")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rules(self) -> List[Rule]:
|
||||||
|
"""Read-only view (returns a shallow copy) of registered rules."""
|
||||||
|
return list(self._rules)
|
||||||
|
|
||||||
def load_rules_from_yaml(yaml_file: Union[str, Path], logger: Optional[logging.Logger] = None) -> List[Rule]:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
|
||||||
"""
|
"""
|
||||||
Load rules from a YAML file.
|
Load rules from a YAML file.
|
||||||
|
|
||||||
@@ -242,8 +306,6 @@ def load_rules_from_yaml(yaml_file: Union[str, Path], logger: Optional[logging.L
|
|||||||
Returns:
|
Returns:
|
||||||
List[Rule]
|
List[Rule]
|
||||||
"""
|
"""
|
||||||
if logger is None:
|
|
||||||
logger = get_engine_logger()
|
|
||||||
|
|
||||||
rules: List[Rule] = []
|
rules: List[Rule] = []
|
||||||
|
|
||||||
@@ -287,5 +349,4 @@ def load_rules_from_yaml(yaml_file: Union[str, Path], logger: Optional[logging.L
|
|||||||
rules.append(rule)
|
rules.append(rule)
|
||||||
idx = idx + 1
|
idx = idx + 1
|
||||||
|
|
||||||
logger.info(f"[Loader] Loaded {len(rules)} rules from '{yaml_file}'.")
|
|
||||||
return rules
|
return rules
|
||||||
17
app/state.py
Normal file
17
app/state.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# app/state.py
|
||||||
|
from typing import Optional
|
||||||
|
from app.logging_setup import get_engine_logger
|
||||||
|
|
||||||
|
_engine_singleton = None
|
||||||
|
|
||||||
|
def set_rules_engine(engine) -> None:
|
||||||
|
"""Install the process-global rules engine."""
|
||||||
|
global _engine_singleton
|
||||||
|
_engine_singleton = engine
|
||||||
|
get_engine_logger().info("[state] engine set id=%s count=%s",
|
||||||
|
hex(id(engine)),
|
||||||
|
len(getattr(engine, "rules", [])))
|
||||||
|
|
||||||
|
def get_rules_engine() -> Optional[object]:
|
||||||
|
"""Return the process-global rules engine, or None if not set yet."""
|
||||||
|
return _engine_singleton
|
||||||
@@ -350,6 +350,11 @@ details pre.code {
|
|||||||
border: 1px solid #d9e2ec;
|
border: 1px solid #d9e2ec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.checkbox-row {
|
||||||
|
display: flex; align-items: center; gap: .5rem;
|
||||||
|
margin: .5rem 0 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
/* ==========================================================================
|
/* ==========================================================================
|
||||||
11) Sticky Top Jump Navigation
|
11) Sticky Top Jump Navigation
|
||||||
-------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------- */
|
||||||
@@ -395,6 +400,52 @@ details pre.code {
|
|||||||
box-shadow: inset 0 0 0 1px rgba(59,130,246,.25);
|
box-shadow: inset 0 0 0 1px rgba(59,130,246,.25);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* --- Titles and structure --- */
|
||||||
|
.card-title { margin: 0 0 .5rem; font-size: 1.1rem; }
|
||||||
|
.section { margin-top: 1rem; }
|
||||||
|
.section-header { display: flex; gap: .5rem; align-items: baseline; flex-wrap: wrap; }
|
||||||
|
|
||||||
|
/* --- Divider --- */
|
||||||
|
.divider { border: 0; border-top: 1px solid #1f2a36; margin: 1rem 0; }
|
||||||
|
|
||||||
|
/* --- Badges / Chips --- */
|
||||||
|
.badge { display: inline-block; padding: .15rem .5rem; border-radius: 999px; font-size: .75rem; border: 1px solid transparent; }
|
||||||
|
.badge-ok { background: #0e3820; border-color: #2c6e49; color: #bff3cf; }
|
||||||
|
.badge-warn { background: #3d290e; border-color: #9a6b18; color: #ffe2a8; }
|
||||||
|
.badge-danger { background: #401012; border-color: #a33a42; color: #ffc1c5; }
|
||||||
|
.badge-muted { background: #111826; border-color: #273447; color: #9fb0c3; }
|
||||||
|
|
||||||
|
.chip { display: inline-block; padding: .1rem .4rem; border: 1px solid #273447; border-radius: 8px; font-size: .75rem; margin-right: .25rem; }
|
||||||
|
.chip-warn { border-color: #9a6b18; }
|
||||||
|
|
||||||
|
/* --- Text helpers --- */
|
||||||
|
.muted { color: #9fb0c3; }
|
||||||
|
.small { font-size: .8rem; }
|
||||||
|
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; }
|
||||||
|
.prewrap { white-space: pre-wrap; }
|
||||||
|
|
||||||
|
/* --- Lists / details --- */
|
||||||
|
.list { margin: .5rem 0; padding-left: 1.1rem; }
|
||||||
|
.details summary { cursor: pointer; }
|
||||||
|
|
||||||
|
/* --- Grid --- */
|
||||||
|
.grid.two { display: grid; grid-template-columns: 1fr; gap: 1rem; }
|
||||||
|
@media (min-width: 900px) {
|
||||||
|
.grid.two { grid-template-columns: 1fr 1fr; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --- TLS Matrix --- */
|
||||||
|
.tls-matrix { border: 1px solid #1f2a36; border-radius: 10px; overflow: hidden; }
|
||||||
|
.tls-matrix-row { display: grid; grid-template-columns: 120px 140px 1fr 100px; gap: .5rem; align-items: center;
|
||||||
|
padding: .5rem .75rem; border-bottom: 1px solid #1f2a36; }
|
||||||
|
.tls-matrix-row:last-child { border-bottom: none; }
|
||||||
|
|
||||||
|
.tls-cell.version { font-weight: 600; }
|
||||||
|
.tls-cell.status {}
|
||||||
|
.tls-cell.cipher {}
|
||||||
|
.tls-cell.latency { text-align: right; }
|
||||||
|
|
||||||
|
|
||||||
/* ==========================================================================
|
/* ==========================================================================
|
||||||
12) Responsive Tweaks
|
12) Responsive Tweaks
|
||||||
-------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------- */
|
||||||
|
|||||||
182
app/templates/_macros_ssl_tls.html
Normal file
182
app/templates/_macros_ssl_tls.html
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
{# templates/_macros_ssl_tls.html #}
|
||||||
|
{% macro ssl_tls_card(ssl_tls) %}
|
||||||
|
<div class="card" id="ssl">
|
||||||
|
<h2 class="card-title">SSL/TLS Intelligence</h2>
|
||||||
|
|
||||||
|
{# -------- 1) Error branch -------- #}
|
||||||
|
{% if ssl_tls is none or 'error' in ssl_tls %}
|
||||||
|
<div class="badge badge-danger">Error</div>
|
||||||
|
<p class="muted">SSL/TLS enrichment failed or is unavailable.</p>
|
||||||
|
{% if ssl_tls and ssl_tls.error %}<pre class="prewrap">{{ ssl_tls.error }}</pre>{% endif %}
|
||||||
|
|
||||||
|
{# -------- 2) Skipped branch -------- #}
|
||||||
|
{% elif ssl_tls.skipped %}
|
||||||
|
<div class="badge badge-muted">Skipped</div>
|
||||||
|
{% if ssl_tls.reason %}<span class="muted small">{{ ssl_tls.reason }}</span>{% endif %}
|
||||||
|
|
||||||
|
<div class="section">
|
||||||
|
<button class="badge badge-muted" data-toggle="tls-raw">Toggle raw</button>
|
||||||
|
<pre id="tls-raw" hidden>{{ ssl_tls|tojson(indent=2) }}</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{# -------- 3) Normal branch (render probe + crt.sh) -------- #}
|
||||||
|
{% else %}
|
||||||
|
|
||||||
|
{# ===================== LIVE PROBE ===================== #}
|
||||||
|
{% set probe = ssl_tls.probe if ssl_tls else None %}
|
||||||
|
<section class="section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h3>Live TLS Probe</h3>
|
||||||
|
{% if probe %}
|
||||||
|
<span class="muted">Host:</span> <code>{{ probe.hostname }}:{{ probe.port }}</code>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if not probe %}
|
||||||
|
<p class="muted">No probe data.</p>
|
||||||
|
{% else %}
|
||||||
|
<div class="tls-matrix">
|
||||||
|
{% set versions = ['TLS1.0','TLS1.1','TLS1.2','TLS1.3'] %}
|
||||||
|
{% for v in versions %}
|
||||||
|
{% set r = probe.results_by_version.get(v) if probe.results_by_version else None %}
|
||||||
|
<div class="tls-matrix-row">
|
||||||
|
<div class="tls-cell version">{{ v }}</div>
|
||||||
|
|
||||||
|
{% if r and r.supported %}
|
||||||
|
<div class="tls-cell status"><span class="badge badge-ok">Supported</span></div>
|
||||||
|
<div class="tls-cell cipher">
|
||||||
|
{% if r.selected_cipher %}
|
||||||
|
<span class="chip">{{ r.selected_cipher }}</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="muted">cipher: n/a</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="tls-cell latency">
|
||||||
|
{% if r.handshake_seconds is not none %}
|
||||||
|
<span class="muted">{{ '%.0f' % (r.handshake_seconds*1000) }} ms</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="muted">—</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="tls-cell status"><span class="badge badge-muted">Not Supported</span></div>
|
||||||
|
<div class="tls-cell cipher">
|
||||||
|
{% if r and r.error %}
|
||||||
|
<span class="muted small">({{ r.error }})</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="muted">—</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="tls-cell latency"><span class="muted">—</span></div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flag-row">
|
||||||
|
{% if probe.weak_protocols and probe.weak_protocols|length > 0 %}
|
||||||
|
<span class="badge badge-warn">Weak Protocols</span>
|
||||||
|
{% for wp in probe.weak_protocols %}
|
||||||
|
<span class="chip chip-warn">{{ wp }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% if probe.weak_ciphers and probe.weak_ciphers|length > 0 %}
|
||||||
|
<span class="badge badge-warn">Weak Ciphers</span>
|
||||||
|
{% for wc in probe.weak_ciphers %}
|
||||||
|
<span class="chip chip-warn">{{ wc }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if probe.errors and probe.errors|length > 0 %}
|
||||||
|
<details class="details">
|
||||||
|
<summary>Probe Notes</summary>
|
||||||
|
<ul class="list">
|
||||||
|
{% for e in probe.errors %}
|
||||||
|
<li class="muted small">{{ e }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</details>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<hr class="divider"/>
|
||||||
|
|
||||||
|
{# ===================== CRT.SH ===================== #}
|
||||||
|
{% set crtsh = ssl_tls.crtsh if ssl_tls else None %}
|
||||||
|
<section class="section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h3>Certificate Transparency (crt.sh)</h3>
|
||||||
|
{% if crtsh %}
|
||||||
|
<span class="muted">Parsed:</span>
|
||||||
|
<code>{{ crtsh.hostname or 'n/a' }}</code>
|
||||||
|
{% if crtsh.root_domain %}
|
||||||
|
<span class="muted"> • Root:</span> <code>{{ crtsh.root_domain }}</code>
|
||||||
|
{% if crtsh.is_root_domain %}<span class="badge badge-ok">Root</span>{% else %}<span class="badge badge-muted">Subdomain</span>{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if not crtsh %}
|
||||||
|
<p class="muted">No CT data.</p>
|
||||||
|
{% else %}
|
||||||
|
<div class="grid two">
|
||||||
|
<div>
|
||||||
|
<h4 class="muted">Host Certificates</h4>
|
||||||
|
{% set host_certs = crtsh.crtsh.host_certs if 'crtsh' in crtsh and crtsh.crtsh else None %}
|
||||||
|
{% if host_certs and host_certs|length > 0 %}
|
||||||
|
<ul class="list">
|
||||||
|
{% for c in host_certs[:10] %}
|
||||||
|
<li class="mono small">
|
||||||
|
<span class="chip">{{ c.get('issuer_name','issuer n/a') }}</span>
|
||||||
|
<span class="muted"> • </span>
|
||||||
|
<strong>{{ c.get('name_value','(name n/a)') }}</strong>
|
||||||
|
<span class="muted"> • not_before:</span> {{ c.get('not_before','?') }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% if host_certs|length > 10 %}
|
||||||
|
<div class="muted small">(+ {{ host_certs|length - 10 }} more)</div>
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
<p class="muted">No active host certs found.</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<h4 class="muted">Wildcard on Root</h4>
|
||||||
|
{% set wc = crtsh.crtsh.wildcard_root_certs if 'crtsh' in crtsh and crtsh.crtsh else None %}
|
||||||
|
{% if wc and wc|length > 0 %}
|
||||||
|
<ul class="list">
|
||||||
|
{% for c in wc[:10] %}
|
||||||
|
<li class="mono small">
|
||||||
|
<span class="chip">{{ c.get('issuer_name','issuer n/a') }}</span>
|
||||||
|
<span class="muted"> • </span>
|
||||||
|
<strong>{{ c.get('name_value','(name n/a)') }}</strong>
|
||||||
|
<span class="muted"> • not_before:</span> {{ c.get('not_before','?') }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% if wc|length > 10 %}
|
||||||
|
<div class="muted small">(+ {{ wc|length - 10 }} more)</div>
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
<p class="muted">No wildcard/root certs found.</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
{# ===================== RAW JSON TOGGLE ===================== #}
|
||||||
|
<div class="section">
|
||||||
|
<button class="badge badge-muted" data-toggle="tls-raw">Toggle raw</button>
|
||||||
|
<pre id="tls-raw" hidden>{{ ssl_tls|tojson(indent=2) }}</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
{% endmacro %}
|
||||||
@@ -6,6 +6,13 @@
|
|||||||
<h2>Analyze a URL</h2>
|
<h2>Analyze a URL</h2>
|
||||||
<label for="url">Enter a URL to analyze</label>
|
<label for="url">Enter a URL to analyze</label>
|
||||||
<input id="url" name="url" type="url" placeholder="https://example.com" required />
|
<input id="url" name="url" type="url" placeholder="https://example.com" required />
|
||||||
|
|
||||||
|
<!-- toggle for pulling ssl/cert data -->
|
||||||
|
<label class="checkbox-row">
|
||||||
|
<input type="checkbox" name="fetch_ssl" value="1">
|
||||||
|
Pull SSL/TLS data (crt.sh + version probe) - Warning, crt.sh can be <b>very slow</b> at times
|
||||||
|
</label>
|
||||||
|
|
||||||
<button type="submit">Analyze</button>
|
<button type="submit">Analyze</button>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
|
{% from "_macros_ssl_tls.html" import ssl_tls_card %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
|
|
||||||
<!-- Top Jump List -->
|
<!-- Top Jump List -->
|
||||||
@@ -8,6 +9,7 @@
|
|||||||
<a href="/">Analyse Another Page</a>
|
<a href="/">Analyse Another Page</a>
|
||||||
<a href="#url-overview">URL Overview</a>
|
<a href="#url-overview">URL Overview</a>
|
||||||
<a href="#enrichment">Enrichment</a>
|
<a href="#enrichment">Enrichment</a>
|
||||||
|
<a href="#ssl">TLS / Certs</a>
|
||||||
<a href="#redirects">Redirects</a>
|
<a href="#redirects">Redirects</a>
|
||||||
<a href="#forms">Forms</a>
|
<a href="#forms">Forms</a>
|
||||||
<a href="#scripts">Suspicious Scripts</a>
|
<a href="#scripts">Suspicious Scripts</a>
|
||||||
@@ -86,6 +88,9 @@
|
|||||||
<p><a href="#top-jump-list">Back to top</a></p>
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- TLS / SSL / CERTS -->
|
||||||
|
{{ ssl_tls_card(enrichment.ssl_tls) }}
|
||||||
|
|
||||||
<!-- Redirects -->
|
<!-- Redirects -->
|
||||||
<div class="card" id="redirects">
|
<div class="card" id="redirects">
|
||||||
<h2>Redirects</h2>
|
<h2>Redirects</h2>
|
||||||
@@ -466,5 +471,19 @@ document.addEventListener('click', function (e) {
|
|||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.addEventListener('click', function (e) {
|
||||||
|
if (e.target.matches('[data-toggle]')) {
|
||||||
|
var id = e.target.getAttribute('data-toggle');
|
||||||
|
var el = document.getElementById(id);
|
||||||
|
if (el) {
|
||||||
|
var hidden = el.getAttribute('hidden') !== null;
|
||||||
|
if (hidden) { el.removeAttribute('hidden'); } else { el.setAttribute('hidden', ''); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, true);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
@@ -35,10 +35,12 @@ from playwright.async_api import async_playwright, TimeoutError as PWTimeoutErro
|
|||||||
from app.utils.io_helpers import safe_write
|
from app.utils.io_helpers import safe_write
|
||||||
from app.utils.enrichment import enrich_url
|
from app.utils.enrichment import enrich_url
|
||||||
from app.utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
|
from app.logging_setup import get_app_logger
|
||||||
|
|
||||||
# Load settings once for constants / defaults
|
# Load settings once for constants / defaults
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
||||||
|
logger = get_app_logger()
|
||||||
|
|
||||||
class Browser:
|
class Browser:
|
||||||
"""
|
"""
|
||||||
@@ -280,7 +282,7 @@ class Browser:
|
|||||||
except Exception as rule_exc:
|
except Exception as rule_exc:
|
||||||
# Be defensive—bad rule shouldn't break the form pass
|
# Be defensive—bad rule shouldn't break the form pass
|
||||||
try:
|
try:
|
||||||
self.logger.debug("Form rule error", extra={"rule": getattr(r, "name", "?"), "error": str(rule_exc)})
|
logger.debug("Form rule error", extra={"rule": getattr(r, "name", "?"), "error": str(rule_exc)})
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
continue
|
continue
|
||||||
@@ -298,7 +300,7 @@ class Browser:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# Keep analysis resilient
|
# Keep analysis resilient
|
||||||
try:
|
try:
|
||||||
self.logger.error("Form analysis error", extra={"error": str(exc)})
|
logger.error("Form analysis error", extra={"error": str(exc)})
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
results.append({
|
results.append({
|
||||||
@@ -390,7 +392,7 @@ class Browser:
|
|||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Fetcher / Orchestrator
|
# Fetcher / Orchestrator
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
async def fetch_page_artifacts(self, url: str) -> Dict[str, Any]:
|
async def fetch_page_artifacts(self, url: str, fetch_ssl_enabled:bool=False) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Fetch page artifacts and save them in a UUID-based directory for this Browser's storage_dir.
|
Fetch page artifacts and save them in a UUID-based directory for this Browser's storage_dir.
|
||||||
|
|
||||||
@@ -476,7 +478,7 @@ class Browser:
|
|||||||
suspicious_scripts = self.analyze_scripts(html_content, base_url=final_url)
|
suspicious_scripts = self.analyze_scripts(html_content, base_url=final_url)
|
||||||
|
|
||||||
# Enrichment
|
# Enrichment
|
||||||
enrichment = enrich_url(url)
|
enrichment = enrich_url(url, fetch_ssl_enabled)
|
||||||
|
|
||||||
# Global PASS/FAIL table per category (entire document)
|
# Global PASS/FAIL table per category (entire document)
|
||||||
rule_checks_overview = self.build_rule_checks_overview(html_content)
|
rule_checks_overview = self.build_rule_checks_overview(html_content)
|
||||||
@@ -505,7 +507,7 @@ class Browser:
|
|||||||
safe_write(results_path, json.dumps(result, indent=2, ensure_ascii=False))
|
safe_write(results_path, json.dumps(result, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_app.logger.info(f"[browser] Saved results.json for run {run_uuid}")
|
logger.info(f"Saved results.json for run {run_uuid}")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -1,19 +1,25 @@
|
|||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import json
|
||||||
import whois
|
import whois
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from ipaddress import ip_address
|
from ipaddress import ip_address
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
# Optional: high-accuracy root-domain detection if available (tldextract is in the requirements, but this is still useful)
|
||||||
|
try:
|
||||||
|
import tldextract
|
||||||
|
_HAS_TLDEXTRACT = True
|
||||||
|
except Exception:
|
||||||
|
_HAS_TLDEXTRACT = False
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
from app.utils.cache_db import get_cache
|
from app.utils.cache_db import get_cache
|
||||||
from app.utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
|
from app.utils.tls_probe import TLSEnumerator
|
||||||
|
|
||||||
# Configure logging
|
# Configure logger
|
||||||
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
from app.logging_setup import get_app_logger
|
||||||
|
|
||||||
# Init cache
|
# Init cache
|
||||||
cache = get_cache("/data/cache.db")
|
cache = get_cache("/data/cache.db")
|
||||||
@@ -25,32 +31,244 @@ days = 24 * 60
|
|||||||
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
|
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
|
||||||
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
|
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
|
||||||
|
|
||||||
def enrich_url(url: str) -> dict:
|
logger = get_app_logger()
|
||||||
"""Perform WHOIS, GeoIP, and BEC word enrichment."""
|
|
||||||
result = {}
|
|
||||||
|
|
||||||
|
def parse_target_to_host(target):
|
||||||
|
"""
|
||||||
|
Convert a user-supplied string (URL or domain) into a hostname.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str or None
|
||||||
|
"""
|
||||||
|
if target is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
value = str(target).strip()
|
||||||
|
if value == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
# urlparse needs a scheme to treat the first token as netloc
|
||||||
|
parsed = urlparse(value if "://" in value else f"http://{value}")
|
||||||
|
|
||||||
|
# If the input was something like "localhost:8080/path", netloc includes the port
|
||||||
|
host = parsed.hostname
|
||||||
|
if host is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Lowercase for consistency
|
||||||
|
host = host.strip().lower()
|
||||||
|
if host == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
return host
|
||||||
|
|
||||||
|
def get_root_domain(hostname):
|
||||||
|
"""
|
||||||
|
Determine the registrable/root domain from a hostname.
|
||||||
|
Prefers tldextract if available; otherwise falls back to a heuristic.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
sub.a.example.com -> example.com
|
||||||
|
portal.gov.uk -> gov.uk (but with PSL, you’d get portal.gov.uk’s registrable, which is gov.uk)
|
||||||
|
api.example.co.uk -> example.co.uk (PSL needed for correctness)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str (best-effort registrable domain)
|
||||||
|
"""
|
||||||
|
if hostname is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if _HAS_TLDEXTRACT:
|
||||||
|
# tldextract returns subdomain, domain, suffix separately using PSL rules
|
||||||
|
# e.g., sub= "api", domain="example", suffix="co.uk"
|
||||||
|
parts = tldextract.extract(hostname)
|
||||||
|
# If suffix is empty (e.g., localhost), fall back
|
||||||
|
if parts.suffix:
|
||||||
|
return f"{parts.domain}.{parts.suffix}".lower()
|
||||||
|
else:
|
||||||
|
return hostname.lower()
|
||||||
|
|
||||||
|
# Fallback heuristic: last two labels (not perfect for multi-part TLDs, but safe)
|
||||||
|
# We avoid list comprehensions per your preference for explicit code
|
||||||
|
labels = hostname.split(".")
|
||||||
|
labels = [lbl for lbl in labels if lbl] # allow simple cleanup without logic change
|
||||||
|
|
||||||
|
if len(labels) >= 2:
|
||||||
|
last = labels[-1]
|
||||||
|
second_last = labels[-2]
|
||||||
|
candidate = f"{second_last}.{last}".lower()
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
return hostname.lower()
|
||||||
|
|
||||||
|
def is_root_domain(hostname):
|
||||||
|
"""
|
||||||
|
Is the provided hostname the same as its registrable/root domain?
|
||||||
|
"""
|
||||||
|
if hostname is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
root = get_root_domain(hostname)
|
||||||
|
if root is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return hostname.lower() == root.lower()
|
||||||
|
|
||||||
|
def search_certs(domain, wildcard=True, expired=True, deduplicate=True):
|
||||||
|
"""
|
||||||
|
Search crt.sh for the given domain.
|
||||||
|
|
||||||
|
domain -- Domain to search for
|
||||||
|
wildcard -- Whether or not to prepend a wildcard to the domain
|
||||||
|
(default: True)
|
||||||
|
expired -- Whether or not to include expired certificates
|
||||||
|
(default: True)
|
||||||
|
|
||||||
|
Return a list of objects, like so:
|
||||||
|
|
||||||
|
{
|
||||||
|
"issuer_ca_id": 16418,
|
||||||
|
"issuer_name": "C=US, O=Let's Encrypt, CN=Let's Encrypt Authority X3",
|
||||||
|
"name_value": "hatch.uber.com",
|
||||||
|
"min_cert_id": 325717795,
|
||||||
|
"min_entry_timestamp": "2018-02-08T16:47:39.089",
|
||||||
|
"not_before": "2018-02-08T15:47:39"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
base_url = "https://crt.sh/?q={}&output=json"
|
||||||
|
if not expired:
|
||||||
|
base_url = base_url + "&exclude=expired"
|
||||||
|
if deduplicate:
|
||||||
|
base_url = base_url + "&deduplicate=Y"
|
||||||
|
if wildcard and "%" not in domain:
|
||||||
|
domain = "%.{}".format(domain)
|
||||||
|
url = base_url.format(domain)
|
||||||
|
|
||||||
|
ua = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'
|
||||||
|
req = requests.get(url, headers={'User-Agent': ua})
|
||||||
|
|
||||||
|
if req.ok:
|
||||||
|
try:
|
||||||
|
content = req.content.decode('utf-8')
|
||||||
|
data = json.loads(content)
|
||||||
|
return data
|
||||||
|
except ValueError:
|
||||||
|
# crt.sh fixed their JSON response. This shouldn't be necessary anymore
|
||||||
|
# https://github.com/crtsh/certwatch_db/commit/f4f46ea37c23543c4cdf1a3c8867d68967641807
|
||||||
|
data = json.loads("[{}]".format(content.replace('}{', '},{')))
|
||||||
|
return data
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Error retrieving cert information from CRT.sh.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def gather_crtsh_certs_for_target(target):
|
||||||
|
"""
|
||||||
|
Given a URL or domain-like input, return crt.sh results for:
|
||||||
|
- The exact hostname
|
||||||
|
- If hostname is a subdomain, also the wildcard for the root domain (e.g., *.example.com)
|
||||||
|
|
||||||
|
We intentionally run this even if the scheme is HTTP (per your design).
|
||||||
|
Expired certs are excluded by default.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict:
|
||||||
|
{
|
||||||
|
"input": <original target>,
|
||||||
|
"hostname": <parsed hostname>,
|
||||||
|
"root_domain": <registrable>,
|
||||||
|
"is_root_domain": <bool>,
|
||||||
|
"crtsh": {
|
||||||
|
"host_certs": [... or None],
|
||||||
|
"wildcard_root_certs": [... or None]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
"input": target,
|
||||||
|
"hostname": None,
|
||||||
|
"root_domain": None,
|
||||||
|
"is_root_domain": False,
|
||||||
|
"crtsh": {
|
||||||
|
"host_certs": None,
|
||||||
|
"wildcard_root_certs": None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
hostname = parse_target_to_host(target)
|
||||||
|
result["hostname"] = hostname
|
||||||
|
|
||||||
|
if hostname is None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
root = get_root_domain(hostname)
|
||||||
|
result["root_domain"] = root
|
||||||
|
result["is_root_domain"] = is_root_domain(hostname)
|
||||||
|
|
||||||
|
# Always query crt.sh for the specific hostname
|
||||||
|
# (expired=False means we filter expired)
|
||||||
|
host_certs = search_certs(hostname, wildcard=False, expired=False)
|
||||||
|
result["crtsh"]["host_certs"] = host_certs
|
||||||
|
|
||||||
|
# If subdomain, also look up wildcard for the root domain: *.root
|
||||||
|
if not result["is_root_domain"] and root:
|
||||||
|
wildcard_certs = search_certs(root, wildcard=True, expired=False)
|
||||||
|
result["crtsh"]["wildcard_root_certs"] = wildcard_certs
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("crt.sh enrichment failed: %s", exc)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def enrich_url(url: str, fetch_ssl_enabled:bool=False) -> dict:
|
||||||
|
"""Perform WHOIS, GeoIP"""
|
||||||
|
enrichment = {}
|
||||||
|
|
||||||
# Extract hostname
|
# Extract hostname
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
hostname = parsed.hostname or url # fallback if parsing fails
|
hostname = parsed.hostname or url # fallback if parsing fails
|
||||||
|
|
||||||
# --- WHOIS ---
|
# --- WHOIS ---
|
||||||
result.update(enrich_whois(hostname))
|
enrichment.update(enrich_whois(hostname))
|
||||||
|
|
||||||
# --- GeoIP ---
|
# --- GeoIP ---
|
||||||
result["geoip"] = enrich_geoip(hostname)
|
enrichment["geoip"] = enrich_geoip(hostname)
|
||||||
|
|
||||||
return result
|
# === SSL/TLS: crt.sh + live probe ===
|
||||||
|
# if fetching ssl...
|
||||||
|
if fetch_ssl_enabled:
|
||||||
|
try:
|
||||||
|
# 1) Certificate Transparency (already implemented previously)
|
||||||
|
crtsh_info = gather_crtsh_certs_for_target(url)
|
||||||
|
|
||||||
|
# 2) Live TLS probe (versions + negotiated cipher per version)
|
||||||
|
tls_enum = TLSEnumerator(timeout_seconds=5.0)
|
||||||
|
probe_result = tls_enum.probe(url)
|
||||||
|
|
||||||
|
enrichment["ssl_tls"] = {}
|
||||||
|
enrichment["ssl_tls"]["crtsh"] = crtsh_info
|
||||||
|
enrichment["ssl_tls"]["probe"] = probe_result.to_dict()
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("SSL/TLS enrichment failed: %s", exc)
|
||||||
|
enrichment["ssl_tls"] = {"error": "SSL/TLS enrichment failed"}
|
||||||
|
else:
|
||||||
|
# Include a small marker so the UI can show “skipped”
|
||||||
|
enrichment["ssl_tls"] = {"skipped": True, "reason": "Disabled on submission"}
|
||||||
|
|
||||||
|
return enrichment
|
||||||
|
|
||||||
def enrich_whois(hostname: str) -> dict:
|
def enrich_whois(hostname: str) -> dict:
|
||||||
"""Fetch WHOIS info using python-whois with safe type handling."""
|
"""Fetch WHOIS info using python-whois with safe type handling."""
|
||||||
cache_key = f"whois:{hostname}"
|
cache_key = f"whois:{hostname}"
|
||||||
cached = cache.read(cache_key)
|
cached = cache.read(cache_key)
|
||||||
if cached:
|
if cached:
|
||||||
logging.info(f"[CACHE HIT] for WHOIS: {hostname}")
|
logger.info(f"[CACHE HIT] for WHOIS: {hostname}")
|
||||||
return cached
|
return cached
|
||||||
|
|
||||||
logging.info(f"[CACHE MISS] for WHOIS: {hostname}")
|
logger.info(f"[CACHE MISS] for WHOIS: {hostname}")
|
||||||
result = {}
|
result = {}
|
||||||
try:
|
try:
|
||||||
w = whois.whois(hostname)
|
w = whois.whois(hostname)
|
||||||
@@ -73,7 +291,7 @@ def enrich_whois(hostname: str) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"WHOIS lookup failed for {hostname}: {e}")
|
logger.warning(f"WHOIS lookup failed for {hostname}: {e}")
|
||||||
try:
|
try:
|
||||||
# fallback raw whois text
|
# fallback raw whois text
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -81,14 +299,13 @@ def enrich_whois(hostname: str) -> dict:
|
|||||||
result["whois"] = {}
|
result["whois"] = {}
|
||||||
result["raw_whois"] = raw_output
|
result["raw_whois"] = raw_output
|
||||||
except Exception as raw_e:
|
except Exception as raw_e:
|
||||||
logging.error(f"Raw WHOIS also failed: {raw_e}")
|
logger.error(f"Raw WHOIS also failed: {raw_e}")
|
||||||
result["whois"] = {}
|
result["whois"] = {}
|
||||||
result["raw_whois"] = "N/A"
|
result["raw_whois"] = "N/A"
|
||||||
|
|
||||||
cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
|
cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def enrich_geoip(hostname: str) -> dict:
|
def enrich_geoip(hostname: str) -> dict:
|
||||||
"""Resolve hostname to IPs and fetch info from ip-api.com."""
|
"""Resolve hostname to IPs and fetch info from ip-api.com."""
|
||||||
geo_info = {}
|
geo_info = {}
|
||||||
@@ -98,11 +315,11 @@ def enrich_geoip(hostname: str) -> dict:
|
|||||||
cache_key = f"geoip:{ip_str}"
|
cache_key = f"geoip:{ip_str}"
|
||||||
cached = cache.read(cache_key)
|
cached = cache.read(cache_key)
|
||||||
if cached:
|
if cached:
|
||||||
logging.info(f"[CACHE HIT] for GEOIP: {ip}")
|
logger.info(f"[CACHE HIT] for GEOIP: {ip}")
|
||||||
geo_info[ip_str] = cached
|
geo_info[ip_str] = cached
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logging.info(f"[CACHE MISS] for GEOIP: {ip}")
|
logger.info(f"[CACHE MISS] for GEOIP: {ip}")
|
||||||
try:
|
try:
|
||||||
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
|
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
@@ -116,7 +333,6 @@ def enrich_geoip(hostname: str) -> dict:
|
|||||||
|
|
||||||
return geo_info
|
return geo_info
|
||||||
|
|
||||||
|
|
||||||
def extract_ips_from_url(hostname: str):
|
def extract_ips_from_url(hostname: str):
|
||||||
"""Resolve hostname to IPs."""
|
"""Resolve hostname to IPs."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
from app.logging_setup import get_app_logger
|
||||||
|
|
||||||
|
logger = get_app_logger()
|
||||||
|
|
||||||
def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
|
def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
|
||||||
"""Write content to a file safely with logging."""
|
"""Write content to a file safely with logging."""
|
||||||
@@ -12,9 +13,9 @@ def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
|
|||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(path, mode, encoding=encoding) as f:
|
with open(path, mode, encoding=encoding) as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
logging.info(f"[+] Wrote file: {path}")
|
logger.info(f"[+] Wrote file: {path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"[!] Failed writing {path}: {e}")
|
logger.error(f"[!] Failed writing {path}: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
|
def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ class AppConfig:
|
|||||||
name: str = "MyApp"
|
name: str = "MyApp"
|
||||||
version_major: int = 1
|
version_major: int = 1
|
||||||
version_minor: int = 0
|
version_minor: int = 0
|
||||||
|
print_rule_loads: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
270
app/utils/tls_probe.py
Normal file
270
app/utils/tls_probe.py
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
import socket
|
||||||
|
import ssl
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class TLSProbeResult:
|
||||||
|
"""
|
||||||
|
Container for the results of a TLS probe across protocol versions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.hostname = None
|
||||||
|
self.port = 443
|
||||||
|
self.results_by_version = {} # e.g., {"TLS1.2": {"supported": True, "cipher": "TLS_AES_128_GCM_SHA256", ...}}
|
||||||
|
self.weak_protocols = [] # e.g., ["TLS1.0", "TLS1.1"]
|
||||||
|
self.weak_ciphers = [] # e.g., ["RC4-SHA"]
|
||||||
|
self.errors = [] # textual errors encountered during probing
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""
|
||||||
|
Convert the object to a serializable dictionary.
|
||||||
|
"""
|
||||||
|
output = {
|
||||||
|
"hostname": self.hostname,
|
||||||
|
"port": self.port,
|
||||||
|
"results_by_version": self.results_by_version,
|
||||||
|
"weak_protocols": self.weak_protocols,
|
||||||
|
"weak_ciphers": self.weak_ciphers,
|
||||||
|
"errors": self.errors
|
||||||
|
}
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class TLSEnumerator:
|
||||||
|
"""
|
||||||
|
Enumerate supported TLS versions for a server by attempting handshakes with constrained contexts.
|
||||||
|
Also collects the server-selected cipher for each successful handshake.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- We do NOT validate certificates; this is posture discovery, not trust verification.
|
||||||
|
- Cipher enumeration is limited to "what was negotiated with default cipher list" per version.
|
||||||
|
Deep cipher scanning (per-cipher attempts) can be added later if needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, timeout_seconds=5.0):
|
||||||
|
self.timeout_seconds = float(timeout_seconds)
|
||||||
|
|
||||||
|
def _build_context_for_version(self, tls_version_label):
|
||||||
|
"""
|
||||||
|
Build an SSLContext that only allows the specified TLS version.
|
||||||
|
"""
|
||||||
|
# Base client context
|
||||||
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
|
||||||
|
# Disable certificate checks so we can probe misconfigured/self-signed endpoints
|
||||||
|
context.check_hostname = False
|
||||||
|
context.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
|
# Constrain to a single protocol version using minimum/maximum
|
||||||
|
# Map label -> ssl.TLSVersion
|
||||||
|
if tls_version_label == "TLS1.0" and hasattr(ssl.TLSVersion, "TLSv1"):
|
||||||
|
context.minimum_version = ssl.TLSVersion.TLSv1
|
||||||
|
context.maximum_version = ssl.TLSVersion.TLSv1
|
||||||
|
elif tls_version_label == "TLS1.1" and hasattr(ssl.TLSVersion, "TLSv1_1"):
|
||||||
|
context.minimum_version = ssl.TLSVersion.TLSv1_1
|
||||||
|
context.maximum_version = ssl.TLSVersion.TLSv1_1
|
||||||
|
elif tls_version_label == "TLS1.2" and hasattr(ssl.TLSVersion, "TLSv1_2"):
|
||||||
|
context.minimum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
context.maximum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
elif tls_version_label == "TLS1.3" and hasattr(ssl.TLSVersion, "TLSv1_3"):
|
||||||
|
context.minimum_version = ssl.TLSVersion.TLSv1_3
|
||||||
|
context.maximum_version = ssl.TLSVersion.TLSv1_3
|
||||||
|
else:
|
||||||
|
# Version not supported by this Python/OpenSSL build
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Keep default cipher list; we only want to see what is negotiated
|
||||||
|
# You can later set context.set_ciphers("...") for deeper scans.
|
||||||
|
return context
|
||||||
|
|
||||||
|
def _attempt_handshake(self, hostname, port, context):
|
||||||
|
"""
|
||||||
|
Attempt a TLS handshake to (hostname, port) using the given context.
|
||||||
|
Returns a tuple: (supported(bool), selected_cipher(str or None), elapsed_seconds(float or None), error(str or None))
|
||||||
|
"""
|
||||||
|
supported = False
|
||||||
|
selected_cipher = None
|
||||||
|
elapsed = None
|
||||||
|
error_text = None
|
||||||
|
|
||||||
|
# Create a TCP connection with a timeout
|
||||||
|
sock = None
|
||||||
|
ssock = None
|
||||||
|
start = None
|
||||||
|
try:
|
||||||
|
# Resolve and connect
|
||||||
|
# Note: create_connection will handle IPv4/IPv6 resolution
|
||||||
|
sock = socket.create_connection((hostname, port), timeout=self.timeout_seconds)
|
||||||
|
|
||||||
|
# Start timer right before TLS wrap to capture handshake duration mainly
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# SNI is important: pass server_hostname
|
||||||
|
ssock = context.wrap_socket(sock, server_hostname=hostname)
|
||||||
|
|
||||||
|
# Access negotiated cipher; returns (cipher_name, protocol, secret_bits)
|
||||||
|
cipher_info = ssock.cipher()
|
||||||
|
if cipher_info is not None and len(cipher_info) >= 1:
|
||||||
|
selected_cipher = str(cipher_info[0])
|
||||||
|
|
||||||
|
supported = True
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
# Capture the error for diagnostics
|
||||||
|
error_text = f"{type(exc).__name__}: {str(exc)}"
|
||||||
|
elapsed = None
|
||||||
|
finally:
|
||||||
|
# Clean up sockets
|
||||||
|
try:
|
||||||
|
if ssock is not None:
|
||||||
|
ssock.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
if sock is not None:
|
||||||
|
sock.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return supported, selected_cipher, elapsed, error_text
|
||||||
|
|
||||||
|
def probe(self, target):
|
||||||
|
"""
|
||||||
|
Probe the target (URL or hostname or 'hostname:port') for TLS 1.0/1.1/1.2/1.3 support.
|
||||||
|
Returns TLSProbeResult.
|
||||||
|
"""
|
||||||
|
result = TLSProbeResult()
|
||||||
|
host, port = self._parse_target_to_host_port(target)
|
||||||
|
result.hostname = host
|
||||||
|
result.port = port
|
||||||
|
|
||||||
|
if host is None:
|
||||||
|
result.errors.append("Unable to parse a hostname from the target.")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Define the versions we will test, in ascending order
|
||||||
|
versions_to_test = ["TLS1.0", "TLS1.1", "TLS1.2", "TLS1.3"]
|
||||||
|
|
||||||
|
# Iterate explicitly to match your coding style preference
|
||||||
|
for version_label in versions_to_test:
|
||||||
|
context = self._build_context_for_version(version_label)
|
||||||
|
|
||||||
|
# If this Python/OpenSSL cannot restrict to this version, mark as unsupported_by_runtime
|
||||||
|
if context is None:
|
||||||
|
version_outcome = {
|
||||||
|
"supported": False,
|
||||||
|
"selected_cipher": None,
|
||||||
|
"handshake_seconds": None,
|
||||||
|
"error": "Version not supported by local runtime"
|
||||||
|
}
|
||||||
|
result.results_by_version[version_label] = version_outcome
|
||||||
|
continue
|
||||||
|
|
||||||
|
supported, cipher, elapsed, err = self._attempt_handshake(host, port, context)
|
||||||
|
|
||||||
|
version_outcome = {
|
||||||
|
"supported": supported,
|
||||||
|
"selected_cipher": cipher,
|
||||||
|
"handshake_seconds": elapsed,
|
||||||
|
"error": err
|
||||||
|
}
|
||||||
|
result.results_by_version[version_label] = version_outcome
|
||||||
|
|
||||||
|
# Determine weak protocols (if the handshake succeeded on legacy versions)
|
||||||
|
# RFC 8996 and industry guidance deprecate TLS 1.0 and 1.1.
|
||||||
|
try:
|
||||||
|
v10 = result.results_by_version.get("TLS1.0")
|
||||||
|
if v10 is not None and v10.get("supported") is True:
|
||||||
|
result.weak_protocols.append("TLS1.0")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
v11 = result.results_by_version.get("TLS1.1")
|
||||||
|
if v11 is not None and v11.get("supported") is True:
|
||||||
|
result.weak_protocols.append("TLS1.1")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Flag weak ciphers encountered in any successful negotiation
|
||||||
|
# This is a heuristic: we only see the single chosen cipher per version.
|
||||||
|
try:
|
||||||
|
for label in ["TLS1.0", "TLS1.1", "TLS1.2", "TLS1.3"]:
|
||||||
|
outcome = result.results_by_version.get(label)
|
||||||
|
if outcome is None:
|
||||||
|
continue
|
||||||
|
if outcome.get("supported") is not True:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cipher_name = outcome.get("selected_cipher")
|
||||||
|
if cipher_name is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Simple string-based checks for known-weak families
|
||||||
|
# (RC4, 3DES, NULL, EXPORT, MD5). Expand as needed.
|
||||||
|
name_upper = str(cipher_name).upper()
|
||||||
|
is_weak = False
|
||||||
|
|
||||||
|
if "RC4" in name_upper:
|
||||||
|
is_weak = True
|
||||||
|
elif "3DES" in name_upper or "DES-CBC3" in name_upper:
|
||||||
|
is_weak = True
|
||||||
|
elif "NULL" in name_upper:
|
||||||
|
is_weak = True
|
||||||
|
elif "EXPORT" in name_upper or "EXP-" in name_upper:
|
||||||
|
is_weak = True
|
||||||
|
elif "-MD5" in name_upper:
|
||||||
|
is_weak = True
|
||||||
|
|
||||||
|
if is_weak:
|
||||||
|
# Avoid duplicates
|
||||||
|
if cipher_name not in result.weak_ciphers:
|
||||||
|
result.weak_ciphers.append(cipher_name)
|
||||||
|
except Exception as exc:
|
||||||
|
result.errors.append(f"Cipher analysis error: {exc}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _parse_target_to_host_port(self, target):
|
||||||
|
"""
|
||||||
|
Accepts URL, hostname, or 'hostname:port' and returns (hostname, port).
|
||||||
|
Defaults to port 443 if not specified.
|
||||||
|
"""
|
||||||
|
if target is None:
|
||||||
|
return None, 443
|
||||||
|
|
||||||
|
text = str(target).strip()
|
||||||
|
if text == "":
|
||||||
|
return None, 443
|
||||||
|
|
||||||
|
# If it's clearly a URL, parse it normally
|
||||||
|
if "://" in text:
|
||||||
|
parsed = urlparse(text)
|
||||||
|
hostname = parsed.hostname
|
||||||
|
port = parsed.port
|
||||||
|
if hostname is None:
|
||||||
|
return None, 443
|
||||||
|
if port is None:
|
||||||
|
port = 443
|
||||||
|
return hostname.lower(), int(port)
|
||||||
|
|
||||||
|
# If it's host:port, split safely
|
||||||
|
# Note: URLs without scheme can be tricky (IPv6), but we'll handle [::1]:443 form later if needed
|
||||||
|
if ":" in text and text.count(":") == 1:
|
||||||
|
host_part, port_part = text.split(":")
|
||||||
|
host_part = host_part.strip()
|
||||||
|
port_part = port_part.strip()
|
||||||
|
if host_part == "":
|
||||||
|
return None, 443
|
||||||
|
try:
|
||||||
|
port_value = int(port_part)
|
||||||
|
except Exception:
|
||||||
|
port_value = 443
|
||||||
|
return host_part.lower(), int(port_value)
|
||||||
|
|
||||||
|
# Otherwise treat it as a bare hostname
|
||||||
|
return text.lower(), 443
|
||||||
133
app/utils/url_tools.py
Normal file
133
app/utils/url_tools.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
# app/utils/urltools.py
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
import requests
|
||||||
|
import idna
|
||||||
|
|
||||||
|
# Reuse existing decorator (import from wherever you defined it)
|
||||||
|
|
||||||
|
from app.utils.settings import singleton_loader
|
||||||
|
|
||||||
|
|
||||||
|
class URLNormalizer:
|
||||||
|
"""
|
||||||
|
Normalize user input into a fully-qualified URL for analysis.
|
||||||
|
|
||||||
|
Behavior:
|
||||||
|
- If no scheme is present, prepend https:// by default.
|
||||||
|
- Optional quick HTTPS reachability check with fallback to http://.
|
||||||
|
- Converts Unicode hostnames to punycode via IDNA.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Keep the first-constructed configuration stable via the singleton factory.
|
||||||
|
- Avoids Flask/current_app/threading per your project style.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, prefer_https: bool = True, fallback_http: bool = False, connect_timeout: float = 2.0):
|
||||||
|
self.prefer_https = bool(prefer_https)
|
||||||
|
self.fallback_http = bool(fallback_http)
|
||||||
|
self.connect_timeout = float(connect_timeout)
|
||||||
|
|
||||||
|
def normalize_for_analysis(self, raw_input: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert raw input (URL or domain) into a normalized URL string.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if input is empty/invalid.
|
||||||
|
"""
|
||||||
|
if raw_input is None:
|
||||||
|
raise ValueError("Empty input")
|
||||||
|
|
||||||
|
text = str(raw_input).strip()
|
||||||
|
if text == "":
|
||||||
|
raise ValueError("Empty input")
|
||||||
|
|
||||||
|
# Repair common typos (missing colon)
|
||||||
|
lower = text.lower()
|
||||||
|
if lower.startswith("http//"):
|
||||||
|
text = "http://" + text[6:]
|
||||||
|
elif lower.startswith("https//"):
|
||||||
|
text = "https://" + text[7:]
|
||||||
|
|
||||||
|
# Respect an existing scheme
|
||||||
|
if "://" in text:
|
||||||
|
parsed = urlparse(text)
|
||||||
|
return self._recompose_with_punycode_host(parsed)
|
||||||
|
|
||||||
|
# No scheme -> build one
|
||||||
|
if self.prefer_https:
|
||||||
|
https_url = "https://" + text
|
||||||
|
if self.fallback_http:
|
||||||
|
if self._quick_https_ok(https_url):
|
||||||
|
return self._recompose_with_punycode_host(urlparse(https_url))
|
||||||
|
http_url = "http://" + text
|
||||||
|
return self._recompose_with_punycode_host(urlparse(http_url))
|
||||||
|
return self._recompose_with_punycode_host(urlparse(https_url))
|
||||||
|
|
||||||
|
http_url = "http://" + text
|
||||||
|
return self._recompose_with_punycode_host(urlparse(http_url))
|
||||||
|
|
||||||
|
def _recompose_with_punycode_host(self, parsed):
|
||||||
|
"""
|
||||||
|
Recompose a parsed URL with hostname encoded to ASCII (punycode).
|
||||||
|
Preserves userinfo, port, path, params, query, fragment.
|
||||||
|
"""
|
||||||
|
host = parsed.hostname
|
||||||
|
if host is None:
|
||||||
|
return urlunparse(parsed)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ascii_host = idna.encode(host).decode("ascii")
|
||||||
|
except Exception:
|
||||||
|
ascii_host = host
|
||||||
|
|
||||||
|
# rebuild netloc (auth + port)
|
||||||
|
netloc = ascii_host
|
||||||
|
if parsed.port:
|
||||||
|
netloc = f"{netloc}:{parsed.port}"
|
||||||
|
if parsed.username:
|
||||||
|
if parsed.password:
|
||||||
|
netloc = f"{parsed.username}:{parsed.password}@{netloc}"
|
||||||
|
else:
|
||||||
|
netloc = f"{parsed.username}@{netloc}"
|
||||||
|
|
||||||
|
return urlunparse((
|
||||||
|
parsed.scheme,
|
||||||
|
netloc,
|
||||||
|
parsed.path or "",
|
||||||
|
parsed.params or "",
|
||||||
|
parsed.query or "",
|
||||||
|
parsed.fragment or "",
|
||||||
|
))
|
||||||
|
|
||||||
|
def _quick_https_ok(self, https_url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Quick reachability check for https:// using a HEAD request.
|
||||||
|
Redirects allowed; TLS verify disabled — posture-only.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = requests.head(https_url, allow_redirects=True, timeout=self.connect_timeout, verify=False)
|
||||||
|
_ = resp.status_code
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ---- Singleton factory using our decorator ----
|
||||||
|
@singleton_loader
|
||||||
|
def get_url_normalizer(
|
||||||
|
prefer_https: bool = True,
|
||||||
|
fallback_http: bool = False,
|
||||||
|
connect_timeout: float = 2.0,
|
||||||
|
) -> URLNormalizer:
|
||||||
|
"""
|
||||||
|
Return the singleton URLNormalizer instance.
|
||||||
|
|
||||||
|
IMPORTANT: With this decorator, the FIRST call's arguments "win".
|
||||||
|
Later calls return the cached instance and ignore new arguments.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return URLNormalizer(
|
||||||
|
prefer_https=prefer_https,
|
||||||
|
fallback_http=fallback_http,
|
||||||
|
connect_timeout=connect_timeout,
|
||||||
|
)
|
||||||
11
app/wsgi.py
11
app/wsgi.py
@@ -8,3 +8,14 @@ from . import create_app
|
|||||||
|
|
||||||
# Gunicorn will look for "app"
|
# Gunicorn will look for "app"
|
||||||
app = create_app()
|
app = create_app()
|
||||||
|
|
||||||
|
from app.state import set_rules_engine, get_rules_engine
|
||||||
|
from app.logging_setup import get_app_logger
|
||||||
|
from app.rules.factory import build_rules_engine
|
||||||
|
|
||||||
|
# Preload path: build once, set into global state
|
||||||
|
_engine = build_rules_engine()
|
||||||
|
set_rules_engine(_engine)
|
||||||
|
|
||||||
|
logger = get_app_logger()
|
||||||
|
logger.info("[wsgi] engine id=%s total=%d", hex(id(_engine)), len(_engine.rules))
|
||||||
|
|||||||
@@ -19,4 +19,5 @@ exec gunicorn \
|
|||||||
--worker-class gthread \
|
--worker-class gthread \
|
||||||
--timeout 300 \
|
--timeout 300 \
|
||||||
--graceful-timeout 300 \
|
--graceful-timeout 300 \
|
||||||
|
--preload \
|
||||||
"app.wsgi:app"
|
"app.wsgi:app"
|
||||||
|
|||||||
@@ -1,14 +1,16 @@
|
|||||||
Flask>=3.0.3
|
Flask>=3.0.3 # flask
|
||||||
Jinja2>=3.1.4
|
Jinja2>=3.1.4 # flask dependancy
|
||||||
Werkzeug>=3.0.3
|
Werkzeug>=3.0.3 # flask dependancy
|
||||||
itsdangerous>=2.2.0
|
itsdangerous>=2.2.0 # flask dependancy
|
||||||
click>=8.1.7
|
click>=8.1.7 # flask dependancy
|
||||||
lxml>=5.3.0
|
idna # flask dependancy
|
||||||
|
gunicorn>=22.0.0 # Flask Production server
|
||||||
|
lxml>=5.3.0 # xml parsing
|
||||||
playwright==1.45.0 # Playwright stack
|
playwright==1.45.0 # Playwright stack
|
||||||
beautifulsoup4>=4.12.3 # HTML parsing, etc.
|
beautifulsoup4>=4.12.3 # HTML parsing, etc.
|
||||||
gunicorn>=22.0.0 # Production server
|
|
||||||
python-whois # For WHOIS lookups
|
python-whois # For WHOIS lookups
|
||||||
geoip2 # MaxMind GeoLite2 database for IP geolocation
|
# geoip2 # MaxMind GeoLite2 database for IP geolocation
|
||||||
dnspython # For DNS lookups, including A/AAAA records
|
dnspython # For DNS lookups, including A/AAAA records
|
||||||
ipwhois
|
ipwhois # IP parsing
|
||||||
PyYAML
|
PyYAML # ability to load yaml files
|
||||||
|
tldextract # for tls extraction to find root domain things
|
||||||
Reference in New Issue
Block a user