feat(engine,ui): unify detection in rules engine, add function rules & per-script matches; improve scripts table UX

Core changes - Centralize detection in the Rules Engine; browser.py now focuses on fetch/extract/persist. - Add class-based adapters: - FactAdapter: converts snippets → structured facts. - FunctionRuleAdapter: wraps dict-based rule functions for engine input (str or dict). - Register function rules (code-based) alongside YAML rules: - form_action_missing - form_http_on_https_page - form_submits_to_different_host - script_src_uses_data_or_blob - script_src_has_dangerous_extension - script_third_party_host Rules & YAML - Expand/normalize YAML rules with severities + tags; tighten patterns. - Add new regex rules: new_function_usage, unescape_usage, string_timer_usage, long_hex_constants. - Move iframe rule to `text` category. - Keep existing script/form/text rules; all compile under IGNORECASE. Browser / analysis refactor - browser.py: - Remove inline heuristics; rely on engine for PASS/FAIL, reason, severity, tags. - Build page-level overview (`rule_checks`) across categories. - Analyze forms: add `base_url` + `base_hostname` to snippet so function rules can evaluate; include per-form rule_checks. - Analyze scripts: **per-script evaluation**: - Inline -> run regex script rules on inline text. - External -> run function script rules with a facts dict (src/src_hostname/base_url/base_hostname). - Only include scripts that matched ≥1 rule; attach severity/tags to matches. - Persist single source of truth: `/data/<uuid>/results.json`. - Backward-compat: `fetch_page_artifacts(..., engine=...)` kwarg accepted/ignored. UI/UX - Suspicious Scripts table now shows only matched scripts. - Add severity badges and tag chips; tooltips show rule description. - Prevent table blowouts: - Fixed layout + ellipsis + wrapping helpers (`.scripts-table`, `.breakable`, `details pre.code`). - Shortened inline snippet preview (configurable). - Minor template niceties (e.g., rel="noopener" on external links where applicable). Config - Add `ui.snippet_preview_len` to settings.yaml; default 160. - Load into `app.config["SNIPPET_PREVIEW_LEN"]` and use in `analyze_scripts`. Init / wiring - Import and register function rules as `Rule(...)` objects (not dicts). - Hook Rules Engine to Flask logger for verbose/diagnostic output. - Log totals on startup; keep YAML path override via `SNEAKYSCOPE_RULES_FILE`. Bug fixes - Fix boot crash: pass `Rule` instances to `engine.add_rule()` instead of dicts. - Fix “N/A” in scripts table by actually computing per-script matches. - Ensure form rules fire by including `base_url`/`base_hostname` in form snippets. Roadmap - Update roadmap to reflect completed items: - “Show each check and whether it triggered (pass/fail list per rule)” - Severity levels + tags in Suspicious Scripts - Results.json as route source of truth - Scripts table UX (badges, tooltips, layout fix)
2025-08-20 21:33:30 -05:00
parent 70d29f9f95
commit 1eb2a52f17
14 changed files with 1108 additions and 423 deletions
--- a/app/init.py
+++ b/app/init.py
@@ -1,16 +1,3 @@
-"""
-app/__init__.py
-
-Application factory and startup hooks for SneakyScope.
-
-Responsibilities:
- Create the Flask app.
- Load settings (YAML -> dataclasses) with safe defaults.
- Initialize and load the Suspicious Rules Engine from YAML.
- Register blueprints (routes).
- Configure core paths (e.g., SANDBOX_STORAGE).
-"""
-
 import os
 import logging
 from pathlib import Path
@@ -18,11 +5,24 @@ from flask import Flask

 # Local imports
 from .utils.settings import get_settings
-from .utils import io_helpers  # if you need logging/setup later
-from .utils import cache_db     # available for future injections
-from .utils.rules_engine import RuleEngine, load_rules_from_yaml  # rules engine
+from .utils.rules_engine import RuleEngine, load_rules_from_yaml, Rule
+
+# our code based rules
+from .rules.function_rules import (
+    FactAdapter,
+    FunctionRuleAdapter,
+    script_src_uses_data_or_blob,
+    script_src_has_dangerous_extension,
+    script_third_party_host,
+    form_submits_to_different_host,
+    form_http_on_https_page,
+    form_action_missing,
+)
+
 from . import routes  # blueprint

+# from .utils import io_helpers  # if need logging/setup later
+# from .utils import cache_db     # available for future injections

 def create_app() -> Flask:
    """
@@ -37,46 +37,110 @@ def create_app() -> Flask:
    # Load settings (safe fallback to defaults if file missing)
    settings = get_settings()

-    # Secret key loaded from env
+    # Secret key loaded from env (warn if missing)
    app.secret_key = os.getenv("SECRET_KEY")
+    if not app.secret_key:
+        app.logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")

    # Configure storage directory (bind-mount is still handled by sandbox.sh)
    sandbox_storage_default = Path("/data")
    app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)

-    # Initialize Suspicious Rules Engine at startup
-    # Determine rules file path relative to this package
-    base_dir = Path(__file__).resolve().parent
-    rules_path = base_dir / "config" / "suspicious_rules.yaml"
+    # ---------------------------
+    # Suspicious Rules Engine
+    # ---------------------------

-    # Create an engine instance (even if file missing, we still want an engine)
-    engine = RuleEngine()
+    # Determine rules file path relative to this package (allow env override)
+    base_dir = Path(__file__).resolve().parent
+    default_rules_path = base_dir / "config" / "suspicious_rules.yaml"
+    rules_path_str = os.getenv("SNEAKYSCOPE_RULES_FILE", str(default_rules_path))
+    rules_path = Path(rules_path_str)
+
+    # Create engine bound to Flask logger so all verbose/debug goes to app.logger
+    engine = RuleEngine(rules=[], logger=app.logger)

    # Try to load from YAML if present; log clearly if not
    if rules_path.exists():
        try:
-            loaded_rules = load_rules_from_yaml(rules_path)
-            # Add rules one-by-one (explicit)
-            for rule in loaded_rules:
-                engine.add_rule(rule)
-            app.logger.info(f"[+] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
+            loaded_rules = load_rules_from_yaml(rules_path, logger=app.logger)
+            # Add rules one-by-one (explicit, clearer logs if any rule fails to compile)
+            index = 0
+            total = len(loaded_rules)
+            while index < total:
+                engine.add_rule(loaded_rules[index])
+                index = index + 1
+            app.logger.info(f"[init] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
        except Exception as e:
-            app.logger.warning(f"[!] Failed loading rules from {rules_path}: {e}")
+            app.logger.warning(f"[init] Failed loading rules from {rules_path}: {e}")
    else:
-        app.logger.warning(f"[!] Rules file not found at {rules_path}. Engine will start with zero rules.")
+        app.logger.warning(f"[init] Rules file not found at {rules_path}. Engine will start with zero rules.")

-    # Store engine on app config so it is accessible via current_app
+    # Built-in function-based rules
+    adapter = FactAdapter(logger=app.logger)
+
+    engine.add_rule(Rule(
+        name="form_action_missing",
+        description="Form has no action attribute",
+        category="form",
+        rule_type="function",
+        function=FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter),
+    ))
+
+    engine.add_rule(Rule(
+        name="form_http_on_https_page",
+        description="Form submits via HTTP from HTTPS page",
+        category="form",
+        rule_type="function",
+        function=FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter),
+    ))
+
+    engine.add_rule(Rule(
+        name="form_submits_to_different_host",
+        description="Form submits to a different host",
+        category="form",
+        rule_type="function",
+        function=FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter),
+    ))
+
+    # Script rules expect dict 'facts' (you’ll wire per-script facts later)
+    engine.add_rule(Rule(
+        name="script_src_uses_data_or_blob",
+        description="Script src uses data:/blob: URL",
+        category="script",
+        rule_type="function",
+        function=FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter),
+    ))
+
+    engine.add_rule(Rule(
+        name="script_src_has_dangerous_extension",
+        description="External script with dangerous extension",
+        category="script",
+        rule_type="function",
+        function=FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter),
+    ))
+
+    engine.add_rule(Rule(
+        name="script_third_party_host",
+        description="Script is from a third-party host",
+        category="script",
+        rule_type="function",
+        function=FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter),
+    ))
+
+    # Store engine both ways: attribute (convenient) and config
+    app.rule_engine = engine
    app.config["RULE_ENGINE"] = engine

-    # Make app name/version available for templates here if you want it globally
+    # App metadata available to templates
    app.config["APP_NAME"] = settings.app.name
    app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"

    # Register blueprints
    app.register_blueprint(routes.bp)

-    # Example log line so we know we booted cleanly
+    # Example log lines so we know we booted cleanly
    app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
    app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
+    app.logger.info(f"Registered {len(engine.rules)} total rules (YAML + function)")

    return app