diff --git a/app/__init__.py b/app/__init__.py
index 87b30cb..eb45184 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -2,15 +2,19 @@ import os
import logging
from pathlib import Path
from flask import Flask
+from datetime import datetime
# Local imports
from app.utils.settings import get_settings
from app.logging_setup import wire_logging_once, get_app_logger
+from app.app_settings import AppSettings
from app.blueprints.main import bp as main_bp # ui blueprint
from app.blueprints.api import api_bp as api_bp # api blueprint
from app.blueprints.roadmap import bp as roadmap_bp # roadmap
+
+
def create_app() -> Flask:
"""
Create and configure the Flask application instance.
@@ -34,17 +38,36 @@ def create_app() -> Flask:
if not app.secret_key:
app_logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
- # Configure storage directory (bind-mount is still handled by sandbox.sh)
- sandbox_storage_default = Path("/data")
- app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
+
+ # version
+ version = f"v{AppSettings.version_major}.{AppSettings.version_minor}"
+
+ # allow branding for name if they don't match our name
+ branded_name = settings.branding.name
+ if branded_name == AppSettings.name:
+ public_name = AppSettings.name
+ footer = f"{AppSettings.copyright} {public_name} {version} - {AppSettings.tagline}"
+ else:
+ public_name = f"{branded_name}"
+ link = f'{AppSettings.name}'
+ footer = f"{AppSettings.copyright} {public_name} powered by {link} {version} - {AppSettings.tagline}"
+
+ # web header / footer
+ header = f"{public_name}"
# App metadata available to templates
- app.config["APP_NAME"] = settings.app.name
- app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
+ app.config["APP_NAME"] = public_name
+ app.config["APP_VERSION"] = version
+ app.config["WEB_HEADER"] = header
+ app.config["WEB_FOOTER"] = footer
# roadmap file
app.config["ROADMAP_FILE"] = str(Path(app.root_path) / "docs" / "roadmap.yaml")
+ # Configure storage directory (bind-mount is still handled by sandbox.sh)
+ sandbox_storage_default = Path("/data")
+ app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
+
# Register blueprints
app.register_blueprint(main_bp)
diff --git a/app/app_settings.py b/app/app_settings.py
new file mode 100644
index 0000000..bbc6fc6
--- /dev/null
+++ b/app/app_settings.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+from datetime import datetime
+
+this_year = datetime.strftime(datetime.now(),"%Y")
+
+@dataclass
+class AppSettings:
+ name: str = "SneakyScope"
+ tagline: str = "A selfhosted URL Sandbox"
+ url: str = "https://git.sneakygeek.net/ptarrant/SneakyScope"
+ copyright: str = f"© 2025 - {this_year}"
+ version_major: int = 1
+ version_minor: int = 0
\ No newline at end of file
diff --git a/app/blueprints/main.py b/app/blueprints/main.py
index 383e41d..8c93255 100644
--- a/app/blueprints/main.py
+++ b/app/blueprints/main.py
@@ -1,6 +1,5 @@
# app/blueprints/ui.py
-import os
import json
import asyncio
from pathlib import Path
@@ -9,18 +8,16 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash,
from app.utils.url_tools import get_url_normalizer
from app.utils.browser import get_browser
-from app.utils.enrichment import enrich_url
from app.utils.settings import get_settings
from app.utils.io_helpers import get_recent_results
from app.logging_setup import get_app_logger
+
app_logger = get_app_logger()
bp = Blueprint("main", __name__)
settings = get_settings()
-app_name = settings.app.name
-app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
# --- data cleaner for tls to ensure data is standardized
@@ -68,9 +65,9 @@ def normalize_ssl_tls_for_view(ssl_tls):
def inject_app_info():
"""Inject app name and version into all templates."""
return {
- "app_name": app_name,
- "app_version": app_version,
- "current_year": datetime.strftime(datetime.now(),"%Y")
+ "app_name": current_app.config.get("APP_NAME", "SneakyScope"),
+ "header": current_app.config.get("WEB_HEADER", "SneakyScope"),
+ "footer": current_app.config.get("WEB_FOOTER", "SneakyScope"),
}
@bp.route("/", methods=["GET"])
@@ -115,7 +112,7 @@ def analyze():
app_logger.warning("Empty or invalid URL input")
return redirect(url_for("index"))
- app_logger.info(f"[*] Analyzing URL{target}")
+ app_logger.info(f"[*] Analyzing URL {target}")
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
if not target:
diff --git a/app/config/settings.yaml b/app/config/settings.yaml
index 5bccd29..9b0c79d 100644
--- a/app/config/settings.yaml
+++ b/app/config/settings.yaml
@@ -1,8 +1,8 @@
-app:
- name: SneakyScope
- version_major: 0
- version_minor: 1
+branding:
+ # you can brand your sandbox to anything you want
+ name: Redwire Sandbox
+logging:
# logs when rules are loaded
log_rule_loads: False
@@ -13,15 +13,34 @@ app:
log_rule_debug: False
cache:
+ # number of recent runs shown on front page
recent_runs_count: 10
+
+ # how long to cache whois information
whois_cache_days: 7
+
+ # how long to cache geoip information
geoip_cache_days: 7
+ # should we cache crt certificate pulls?
+ crt_cache_enabled: True
+
+ # how long to cache certificate information (if above is true)
+ crt_cache_days: 7
+
external_script_fetch:
+ # enable ability to pull external scripts
enabled: True
+
+ # max mb of script to pull if pulling
max_total_mb: 5
+
+ # max time to wait for script to pull if pulling
max_time_ms: 3000
+
+ # max redirects for external scripts pull if pulling
max_redirects: 3
ui:
+ # how many char to show in a snippet preview in the gui
snippet_preview_len: 300
diff --git a/app/rules/factory.py b/app/rules/factory.py
index 688b2c0..d77ccf1 100644
--- a/app/rules/factory.py
+++ b/app/rules/factory.py
@@ -34,18 +34,33 @@ def build_rules_engine() -> RuleEngine:
def add(rule: Rule):
eng.add_rule(rule)
- add(Rule("form_action_missing", "Form has no action attribute", "form", "function",
- FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing")))
- add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
- FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
- add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
- FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
- add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
- FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
- add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
- FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
- add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
- FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
+ # Form no action
+ add(Rule(
+ name="form_action_missing",
+ description="Form has no action attribute",
+ category="form",
+ rule_type="function",
+ function=FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing"),
+ ))
+
+ # add(Rule(
+ # name="form_http_on_https_page",
+ # description="Form submits via HTTP from HTTPS page",
+ # category="form",
+ # rule_type="function",
+ # function=FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page"),
+ # ))
+
+ # add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
+ # FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
+ # add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",
+ # FunctionRuleAdapter(form_submits_to_different_host, category="form", adapter=adapter, rule_name="form_submits_to_different_host")))
+ # add(Rule("script_src_uses_data_or_blob", "Script src uses data:/blob: URL", "script", "function",
+ # FunctionRuleAdapter(script_src_uses_data_or_blob, category="script", adapter=adapter, rule_name="script_src_uses_data_or_blob")))
+ # add(Rule("script_src_has_dangerous_extension", "External script with dangerous extension", "script", "function",
+ # FunctionRuleAdapter(script_src_has_dangerous_extension, category="script", adapter=adapter, rule_name="script_src_has_dangerous_extension")))
+ # add(Rule("script_third_party_host", "Script is from a third-party host", "script", "function",
+ # FunctionRuleAdapter(script_third_party_host, category="script", adapter=adapter, rule_name="script_third_party_host")))
log.info("Registered %d total rules (YAML + function)", len(eng.rules))
return eng
diff --git a/app/rules/rules_engine.py b/app/rules/rules_engine.py
index 12b2d2d..90af932 100644
--- a/app/rules/rules_engine.py
+++ b/app/rules/rules_engine.py
@@ -143,6 +143,13 @@ class Rule:
return False, "No match"
if self.rule_type == "function":
+ if not callable(self.function):
+ logger.warning(
+ "[Rule] '%s' function is not callable (type=%s, value=%r)",
+ self.name, type(self.function).__name__, self.function
+ )
+ return False, "Invalid rule configuration: function not callable"
+
if callable(self.function):
try:
matched, reason = self.function(text)
@@ -255,7 +262,7 @@ class RuleEngine:
)
return
- if settings.app.log_rule_loads:
+ if settings.logconfig.log_rule_loads:
logger.info(
"[engine] add_rule: %s/%s replace=%s -> count=%d",
rule.category, rule.name, bool(replace), len(self._rules)
@@ -308,7 +315,7 @@ class RuleEngine:
"""
# --- dispatch visibility --- if set to true, we log applied categories
- if getattr(settings.app, "log_rule_dispatch", False):
+ if getattr(settings.logconfig, "log_rule_dispatch", False):
all_cats = [r.category for r in self._rules]
cat_counts = Counter(all_cats)
# Which categories are being applied this run?
diff --git a/app/templates/base.html b/app/templates/base.html
index eefba8f..b66e0c9 100644
--- a/app/templates/base.html
+++ b/app/templates/base.html
@@ -20,7 +20,7 @@
- SneakyScope
+ {{ header }}
{# Desktop nav #}
@@ -76,7 +76,7 @@
{# Footer #}
{# Flowbite JS (enables collapse) #}
diff --git a/app/templates/index.html b/app/templates/index.html
index ab6c9f7..a815218 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -110,7 +110,7 @@
-
Analyzing website…
+
Analyzing website…
If you are pulling certificates, this may take a long time
@@ -142,11 +142,7 @@ function hideSpinner() {
}
/**
- * Initialize form submit handling:
- * - shows overlay spinner
- * - disables submit button
- * - shows small spinner inside button
- * - lets the browser continue with POST
+ * Initialize form submit handling.
*/
(function initAnalyzeForm() {
const form = document.getElementById('analyze-form');
@@ -155,11 +151,16 @@ function hideSpinner() {
const submitBtn = form.querySelector('button[type="submit"]');
const btnSpinner = document.getElementById('btn-spinner');
- // Hide spinner overlay if arriving from bfcache/back
- window.addEventListener('pageshow', () => {
- hideSpinner();
- if (submitBtn) submitBtn.disabled = false;
- if (btnSpinner) btnSpinner.classList.add('hidden');
+ // Only hide the overlay when returning via BFCache (back/forward)
+ window.addEventListener('pageshow', (e) => {
+ const nav = performance.getEntriesByType('navigation')[0];
+ const isBFCache = e.persisted || nav?.type === 'back_forward';
+
+ if (isBFCache) {
+ hideSpinner();
+ if (submitBtn) submitBtn.disabled = false;
+ if (btnSpinner) btnSpinner.classList.add('hidden');
+ }
});
form.addEventListener('submit', (e) => {
diff --git a/app/templates/partials/result_enrichment.html b/app/templates/partials/result_enrichment.html
index f399c23..3f6fc64 100644
--- a/app/templates/partials/result_enrichment.html
+++ b/app/templates/partials/result_enrichment.html
@@ -33,7 +33,12 @@
GeoIP
{% for ip, info in enrichment.geoip.items() %}
- {{ ip }}
+
+ {{ ip }} -
+ {% if info.country %} {{ info.country }} {% endif %} -
+ {% if info.isp %} {{ info.isp }} {% endif %}
+
+
diff --git a/app/templates/partials/result_forms.html b/app/templates/partials/result_forms.html
index d34f81f..f0e9543 100644
--- a/app/templates/partials/result_forms.html
+++ b/app/templates/partials/result_forms.html
@@ -1,6 +1,6 @@
- Forms
+ Suspicious Form Hits
{% if forms and forms|length > 0 %}
diff --git a/app/templates/partials/result_text.html b/app/templates/partials/result_text.html
index a1e54f4..f5fae21 100644
--- a/app/templates/partials/result_text.html
+++ b/app/templates/partials/result_text.html
@@ -1,6 +1,6 @@
- Text
+ Suspicious Text
{% if suspicious_text and suspicious_text|length > 0 %}
diff --git a/app/utils/enrichment.py b/app/utils/enrichment.py
index 70b0f4d..ad57c67 100644
--- a/app/utils/enrichment.py
+++ b/app/utils/enrichment.py
@@ -30,6 +30,8 @@ days = 24 * 60
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
+CRT_DEFAULT_TTL = settings.cache.crt_cache_days * days
+
logger = get_app_logger()
@@ -137,6 +139,20 @@ def search_certs(domain, wildcard=True, expired=True, deduplicate=True):
"not_before": "2018-02-08T15:47:39"
}
"""
+
+ cache_key = f"crt_cert:{domain}"
+
+ # log if caching is turned on or not
+ logger.info(f"CRT Cache is set to: {settings.cache.crt_cache_enabled}")
+
+ if settings.cache.crt_cache_enabled:
+ cached = cache.read(cache_key)
+ if cached:
+ logger.info(f"[CACHE HIT] for CRT Cert: {domain}")
+ return cached
+ else:
+ logger.info(f"[CACHE MISS] for CRT Cert: {domain} - {cache_key}")
+
base_url = "https://crt.sh/?q={}&output=json"
if not expired:
base_url = base_url + "&exclude=expired"
@@ -153,11 +169,21 @@ def search_certs(domain, wildcard=True, expired=True, deduplicate=True):
try:
content = req.content.decode('utf-8')
data = json.loads(content)
+ # if caching
+ if settings.cache.crt_cache_enabled:
+ logger.info(f"Setting Cache for {cache_key}")
+ cache.create(cache_key, data, CRT_DEFAULT_TTL)
return data
except ValueError:
# crt.sh fixed their JSON response. This shouldn't be necessary anymore
# https://github.com/crtsh/certwatch_db/commit/f4f46ea37c23543c4cdf1a3c8867d68967641807
data = json.loads("[{}]".format(content.replace('}{', '},{')))
+
+ # if caching
+ if settings.cache.crt_cache_enabled:
+ logger.info(f"Setting Cache for {cache_key}")
+ cache.create(cache_key, data, CRT_DEFAULT_TTL)
+
return data
except Exception as err:
logger.error("Error retrieving cert information from CRT.sh.")
@@ -200,6 +226,7 @@ def gather_crtsh_certs_for_target(target):
hostname = parse_target_to_host(target)
result["hostname"] = hostname
+ # return fake return if no hostname was able to be parsed
if hostname is None:
return result
@@ -209,6 +236,7 @@ def gather_crtsh_certs_for_target(target):
# Always query crt.sh for the specific hostname
# (expired=False means we filter expired)
+
host_certs = search_certs(hostname, wildcard=False, expired=False)
result["crtsh"]["host_certs"] = host_certs
diff --git a/app/utils/settings.py b/app/utils/settings.py
index 1f5acd7..7c05476 100644
--- a/app/utils/settings.py
+++ b/app/utils/settings.py
@@ -53,27 +53,34 @@ class UIConfig:
@dataclass
class Cache_Config:
+ recent_runs_count: int = 10
+
whois_cache_days: int = 7
geoip_cache_days: int = 7
- recent_runs_count: int = 10
+ crt_cache_enabled: bool = True
+ crt_cache_days: int = 7
+
+
@dataclass
-class AppConfig:
- name: str = "MyApp"
- version_major: int = 1
- version_minor: int = 0
+class Logging_Config:
log_rule_loads: bool = False
log_rule_dispatch: bool = False
log_rule_debug: bool = False
+@dataclass
+class BrandingConfig:
+ name: str = "MyApp"
+
@dataclass
class Settings:
cache: Cache_Config = field(default_factory=Cache_Config)
ui: UIConfig = field(default_factory=UIConfig)
external_fetch: External_FetchConfig = field(default_factory=External_FetchConfig)
- app: AppConfig = field(default_factory=AppConfig)
+ branding: BrandingConfig = field(default_factory=BrandingConfig)
+ logconfig: Logging_Config = field(default_factory=Logging_Config)
@classmethod
def from_yaml(cls, path: str | Path) -> "Settings":
diff --git a/app/wsgi.py b/app/wsgi.py
index 35f38ba..f0a0733 100644
--- a/app/wsgi.py
+++ b/app/wsgi.py
@@ -9,7 +9,7 @@ from . import create_app
# Gunicorn will look for "app"
app = create_app()
-from app.state import set_rules_engine, get_rules_engine
+from app.state import set_rules_engine
from app.logging_setup import get_app_logger
from app.rules.factory import build_rules_engine
diff --git a/docs/rule_processing_notes.md b/docs/rule_processing_notes.md
new file mode 100644
index 0000000..d0081f0
--- /dev/null
+++ b/docs/rule_processing_notes.md
@@ -0,0 +1,24 @@
+Blueprint -> Analyse calls browser.fetch_page_artifacts
+
+Browser.py -> fetch_page_artifacts
+grabs html_content
+ run analyse_forms on html_content
+ run analyse_scripts on html_content
+ enrich_url
+ build_rule_checks_overview
+
+builds the structured dict for return
+dumps data to disk
+
+
+text flag mapping for rules
+# --- Helper: map human-friendly flags to re.* constants ---
+ FLAG_MAP = {
+ "i": _re.IGNORECASE, "ignorecase": _re.IGNORECASE,
+ "m": _re.MULTILINE, "multiline": _re.MULTILINE,
+ "s": _re.DOTALL, "dotall": _re.DOTALL, "singleline": _re.DOTALL,
+ "x": _re.VERBOSE, "verbose": _re.VERBOSE,
+ "a": _re.ASCII, "ascii": _re.ASCII,
+ "u": _re.UNICODE, "unicode": _re.UNICODE,
+ "l": _re.LOCALE, "locale": _re.LOCALE,
+ }
\ No newline at end of file
diff --git a/docs/rules-rebuild-remember.md b/docs/rules-rebuild-remember.md
new file mode 100644
index 0000000..5349412
--- /dev/null
+++ b/docs/rules-rebuild-remember.md
@@ -0,0 +1,113 @@
+Got it — here’s a structured outline you can keep as a “memory primer” for when you want to resume this work (whether continuing the current engine or pivoting to a SaaS build). Think of it as a **checkpoint map** of what we’ve covered and what directions you flagged.
+
+---
+
+# SneakyScope – Rules Engine Refactor & SaaS Considerations (Outline)
+
+## 1. Current State
+
+* Engine: `app/rules/rules_engine.py` — mixed YAML + code rules, `(bool, str)` return shape expected.
+* Issue: function-based rules returning `(bool, dict)` → caused invalid type warnings.
+* Stopgap: `FunctionRuleAdapter` converted returns.
+* Desire: Remove YAML entirely; rules defined in code only.
+
+---
+
+## 2. Agreed Direction
+
+* **Rule definition approach**:
+
+ * Option B chosen → decorator-based registration.
+ * Every rule defined in `app/rules/` as Python functions.
+ * Rules register with metadata (`name`, `category`, `severity`, etc.).
+
+* **Return shape**:
+
+ * Always return a **Result dict** (no adapter needed).
+ * Engine enforces schema and fills in defaults.
+
+* **Engine relocation**:
+
+ * Move to `app/utils/rules_engine/`.
+ * Responsibilities: load, validate, freeze registry, run rules, aggregate results, log/report.
+
+---
+
+## 3. Result Schema (concept)
+
+* **Per RuleResult**
+
+ * Required: `ok: bool`, `message: str`.
+ * Identity: `name`, `category`, `severity`, `tags`, `rule_version`.
+ * Detail: `data: object|null`.
+ * Timing: `duration_ms`.
+ * Errors: structured `error` object if exceptions occur.
+ * Provenance: `source_module`, optional `policy` snapshot.
+
+* **Per AnalysisResult (run-level envelope)**
+
+ * Input scope: target URL, category, content hash, facts profile.
+ * Provenance: run\_id, engine\_version, ruleset\_checksum, timestamp, duration.
+ * Results: array of RuleResults.
+ * Summary: counts by severity, match count, errors, first match, top severity.
+ * Artifacts: references (screenshot, DOM snapshot, etc.).
+ * Policy snapshot: optional central policy/overrides.
+
+---
+
+## 4. Operational Standards
+
+* **Determinism**: identical inputs + ruleset\_checksum → identical results.
+* **Message stability**: avoid wording churn; expand via `data`.
+* **Size limits**: `message ≤ 256 chars`; `data ≤ 8–16 KB`.
+* **Errors**: `ok=false` if error present; always emit `message`.
+* **Severity**: rule sets default; policy may override.
+* **Tags**: controlled vocabulary; additive.
+
+---
+
+## 5. Migration Plan
+
+1. Create new `rules_engine` package in `app/utils/`.
+2. Add decorator/registry for rules.
+3. Port all rules from YAML → Python modules grouped by category.
+4. Delete YAML loader + adapters.
+5. Update call sites to build `facts` and call `engine.run(...)`.
+6. Add CI tests:
+
+ * Schema compliance.
+ * No duplicates.
+ * Ruleset checksum snapshot.
+7. Integration tests with real fixtures.
+8. Benchmark & harden (caps on input size, rule runtime).
+
+---
+
+## 6. SaaS Expansion (future)
+
+* **Multi-tenancy**: separate org/user scopes for data and rule runs.
+* **RBAC**: roles (admin, analyst, viewer).
+* **Compliance**: logging, retention, export, audit trails.
+* **Rules**: centrally maintained, not user-editable.
+* **APIs**: authenticated endpoints, per-user quotas.
+* **Observability**: per-tenant metrics, alerts.
+* **Security**: sandboxing, strict module allowlists, compliance with SOC2/ISO.
+* **Data controls**: PII redaction, encryption, retention policies.
+
+---
+
+## 7. Future-Proofing Hooks
+
+* Versioning: ruleset checksum + per-rule versions.
+* Extensibility: support `actions`, `links`, `evidence` in Result.
+* Policy: central config for thresholds/overrides.
+* Hot reload (optional, dev-only).
+* Rule provenance tracking (source\_module, commit SHA).
+
+---
+
+✅ This outline is enough to “re-hydrate” the context later — you won’t need to dig back into old logs to remember why `(bool, str)` didn’t fit, why YAML was removed, or what schema we were converging on.
+
+---
+
+Do you want me to also save this in a **short “README-spec” style** (like `RESULTS.md`) so it can live in your repo as the contract doc for rules, or should I keep this as just your personal checkpoint outline?