feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
- crt.sh lookup with expired-filtering and root-domain wildcard resolution
- live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data
- Introduce `URLNormalizer` with punycode support and typo repair
- Auto-prepend `https://` for bare domains (e.g., `google.com`)
- Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
- `get_url_normalizer()` reads defaults from Settings (if present)
- Standardize function-rule return shape to `(bool, dict|None)` across
`form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
- Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
- Adapt non-dict inputs to facts (category-aware and via provided adapter)
- Return `(True, dict)` on match, `(False, None)` on miss
- Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
- Back rules by private `self._rules`; `rules` property returns copy
- Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
- Fix AttributeError from property assignment during `__init__`
- Replace hidden singleton factory with explicit builder + global state:
- `app/rules/factory.py::build_rules_engine()` builds and logs totals
- `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
- `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts
- Unify logging wiring:
- `wire_logging_once(app)` clears and attaches a single handler chain
- Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
- Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)
- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)
- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section
- Consumers should import the rules engine via:
- `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
then publish with `set_rules_engine()`. Do not call old singleton factories.
- New/changed modules (high level):
- `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
- `app/rules/function_rules.py` (±) — normalized payload returns
- `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
- `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
- `app/rules/factory.py` (±) — pure builder; totals logged post-registration
- `app/state.py` (+) — process-global rules engine
- `app/logging_setup.py` (±) — single chain, two named loggers
- `app/wsgi.py` (±) — preload build + `set_rules_engine()`
- `entrypoint.sh` (±) — add `--preload`
- templates (±) — TLS card, raw toggle; front-page checkbox
Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
This commit is contained in:
116
app/__init__.py
116
app/__init__.py
@@ -5,26 +5,11 @@ from flask import Flask
|
||||
|
||||
# Local imports
|
||||
from .utils.settings import get_settings
|
||||
from .utils.rules_engine import RuleEngine, load_rules_from_yaml, Rule
|
||||
|
||||
# our code based rules
|
||||
from .rules.function_rules import (
|
||||
FactAdapter,
|
||||
FunctionRuleAdapter,
|
||||
script_src_uses_data_or_blob,
|
||||
script_src_has_dangerous_extension,
|
||||
script_third_party_host,
|
||||
form_submits_to_different_host,
|
||||
form_http_on_https_page,
|
||||
form_action_missing,
|
||||
)
|
||||
from .logging_setup import wire_logging_once, get_app_logger, get_engine_logger
|
||||
|
||||
from app.blueprints import ui # ui blueprint
|
||||
from app.blueprints import api # api blueprint
|
||||
|
||||
# from .utils import io_helpers # if need logging/setup later
|
||||
# from .utils import cache_db # available for future injections
|
||||
|
||||
def create_app() -> Flask:
|
||||
"""
|
||||
Create and configure the Flask application instance.
|
||||
@@ -35,103 +20,23 @@ def create_app() -> Flask:
|
||||
# Basic app object
|
||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||
|
||||
# logging setup
|
||||
wire_logging_once(app)
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
# Load settings (safe fallback to defaults if file missing)
|
||||
settings = get_settings()
|
||||
|
||||
# Secret key loaded from env (warn if missing)
|
||||
app.secret_key = os.getenv("SECRET_KEY")
|
||||
if not app.secret_key:
|
||||
app.logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
|
||||
app_logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
|
||||
|
||||
# Configure storage directory (bind-mount is still handled by sandbox.sh)
|
||||
sandbox_storage_default = Path("/data")
|
||||
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
|
||||
|
||||
# ---------------------------
|
||||
# Suspicious Rules Engine
|
||||
# ---------------------------
|
||||
|
||||
# Determine rules file path relative to this package (allow env override)
|
||||
base_dir = Path(__file__).resolve().parent
|
||||
default_rules_path = base_dir / "config" / "suspicious_rules.yaml"
|
||||
rules_path_str = os.getenv("SNEAKYSCOPE_RULES_FILE", str(default_rules_path))
|
||||
rules_path = Path(rules_path_str)
|
||||
|
||||
# Create engine bound to Flask logger so all verbose/debug goes to app.logger
|
||||
engine = RuleEngine(rules=[], logger=app.logger)
|
||||
|
||||
# Try to load from YAML if present; log clearly if not
|
||||
if rules_path.exists():
|
||||
try:
|
||||
loaded_rules = load_rules_from_yaml(rules_path, logger=app.logger)
|
||||
# Add rules one-by-one (explicit, clearer logs if any rule fails to compile)
|
||||
index = 0
|
||||
total = len(loaded_rules)
|
||||
while index < total:
|
||||
engine.add_rule(loaded_rules[index])
|
||||
index = index + 1
|
||||
app.logger.info(f"[init] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"[init] Failed loading rules from {rules_path}: {e}")
|
||||
else:
|
||||
app.logger.warning(f"[init] Rules file not found at {rules_path}. Engine will start with zero rules.")
|
||||
|
||||
# Built-in function-based rules
|
||||
adapter = FactAdapter(logger=app.logger)
|
||||
|
||||
engine.add_rule(Rule(
|
||||
name="form_action_missing",
|
||||
description="Form has no action attribute",
|
||||
category="form",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter),
|
||||
))
|
||||
|
||||
engine.add_rule(Rule(
|
||||
name="form_http_on_https_page",
|
||||
description="Form submits via HTTP from HTTPS page",
|
||||
category="form",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter),
|
||||
))
|
||||
|
||||
engine.add_rule(Rule(
|
||||
name="form_submits_to_different_host",
|
||||
description="Form submits to a different host",
|
||||
category="form",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter),
|
||||
))
|
||||
|
||||
# Script rules expect dict 'facts' (you’ll wire per-script facts later)
|
||||
engine.add_rule(Rule(
|
||||
name="script_src_uses_data_or_blob",
|
||||
description="Script src uses data:/blob: URL",
|
||||
category="script",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter),
|
||||
))
|
||||
|
||||
engine.add_rule(Rule(
|
||||
name="script_src_has_dangerous_extension",
|
||||
description="External script with dangerous extension",
|
||||
category="script",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter),
|
||||
))
|
||||
|
||||
engine.add_rule(Rule(
|
||||
name="script_third_party_host",
|
||||
description="Script is from a third-party host",
|
||||
category="script",
|
||||
rule_type="function",
|
||||
function=FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter),
|
||||
))
|
||||
|
||||
# Store engine both ways: attribute (convenient) and config
|
||||
app.rule_engine = engine
|
||||
app.config["RULE_ENGINE"] = engine
|
||||
|
||||
# App metadata available to templates
|
||||
app.config["APP_NAME"] = settings.app.name
|
||||
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
|
||||
@@ -140,9 +45,10 @@ def create_app() -> Flask:
|
||||
app.register_blueprint(ui.bp)
|
||||
app.register_blueprint(api.api_bp)
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
# Example log lines so we know we booted cleanly
|
||||
app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
||||
app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
||||
app.logger.info(f"Registered {len(engine.rules)} total rules (YAML + function)")
|
||||
app_logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
||||
app_logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
||||
|
||||
return app
|
||||
|
||||
Reference in New Issue
Block a user