Compare commits

...

5 Commits

Author SHA1 Message Date
7a04256f75 adding back in rules 2025-09-03 13:22:08 -05:00
248fce5655 adding changelog to pages/routes 2025-09-03 13:17:33 -05:00
d5cc9df699 added cloudflare detection / badge on results page 2025-09-03 10:11:47 -05:00
b59bf67329 added notes to settings.yaml
moved core app config (name, version) out of settings and into app/app_settings.py
added ability to brand SneakyScope to any name
added caching of cert information from crt.sh (cache enable and lenght is configurable in settings.yaml)

streamlined header/footer loading to be more correct
2025-08-23 20:37:44 -05:00
5af8513e14 updating roadmap and internal docs 2025-08-23 16:12:03 -05:00
23 changed files with 634 additions and 61 deletions

View File

@@ -43,7 +43,6 @@ SneakyScope fetches a page in a sandbox, enriches with WHOIS/GeoIP, and runs a u
* **Playwright** for headless page fetch/render * **Playwright** for headless page fetch/render
* **BeautifulSoup4** for parsing * **BeautifulSoup4** for parsing
* **Rules Engine** * **Rules Engine**
* YAML regex rules (`config/suspicious_rules.yaml`) * YAML regex rules (`config/suspicious_rules.yaml`)
* Function rules (`app/rules/function_rules.py`) registered on startup * Function rules (`app/rules/function_rules.py`) registered on startup
* **Artifacts**: persistent path mounted at `/data` (configurable) * **Artifacts**: persistent path mounted at `/data` (configurable)

View File

@@ -2,14 +2,19 @@ import os
import logging import logging
from pathlib import Path from pathlib import Path
from flask import Flask from flask import Flask
from datetime import datetime
# Local imports # Local imports
from app.utils.settings import get_settings from app.utils.settings import get_settings
from app.logging_setup import wire_logging_once, get_app_logger from app.logging_setup import wire_logging_once, get_app_logger
from app.app_settings import AppSettings
from app.blueprints.main import bp as main_bp # ui blueprint from app.blueprints.main import bp as main_bp # ui blueprint
from app.blueprints.api import api_bp as api_bp # api blueprint from app.blueprints.api import api_bp as api_bp # api blueprint
from app.blueprints.roadmap import bp as roadmap_bp # roadmap from app.blueprints.roadmap import bp as roadmap_bp # roadmap
from app.blueprints.changelog import bp as changelog_bp # changelog
def create_app() -> Flask: def create_app() -> Flask:
""" """
@@ -34,22 +39,43 @@ def create_app() -> Flask:
if not app.secret_key: if not app.secret_key:
app_logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.") app_logger.warning("[init] SECRET_KEY is not set; sessions may be insecure in production.")
# Configure storage directory (bind-mount is still handled by sandbox.sh)
sandbox_storage_default = Path("/data") # version
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default) version = f"v{AppSettings.version_major}.{AppSettings.version_minor}"
# allow branding for name if they don't match our name
branded_name = settings.branding.name
if branded_name == AppSettings.name:
public_name = AppSettings.name
footer = f"{AppSettings.copyright} {public_name} {version} - {AppSettings.tagline}"
else:
public_name = f"{branded_name}"
link = f'<a href="{AppSettings.url}" target="_blank">{AppSettings.name}</a>'
footer = f"{AppSettings.copyright} {public_name} powered by {link} {version} - {AppSettings.tagline}"
# web header / footer
header = f"{public_name}"
# App metadata available to templates # App metadata available to templates
app.config["APP_NAME"] = settings.app.name app.config["APP_NAME"] = public_name
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}" app.config["APP_VERSION"] = version
app.config["WEB_HEADER"] = header
app.config["WEB_FOOTER"] = footer
# roadmap file # roadmap file
app.config["ROADMAP_FILE"] = str(Path(app.root_path) / "docs" / "roadmap.yaml") app.config["ROADMAP_FILE"] = str(Path(app.root_path) / "docs" / "roadmap.yaml")
app.config["CHANGELOG_FILE"] = str(Path(app.root_path) / "docs" / "changelog.yaml")
# Configure storage directory (bind-mount is still handled by sandbox.sh)
sandbox_storage_default = Path("/data")
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
# Register blueprints # Register blueprints
app.register_blueprint(main_bp) app.register_blueprint(main_bp)
app.register_blueprint(api_bp) app.register_blueprint(api_bp)
app.register_blueprint(roadmap_bp) app.register_blueprint(roadmap_bp)
app.register_blueprint(changelog_bp)
app_logger = get_app_logger() app_logger = get_app_logger()

13
app/app_settings.py Normal file
View File

@@ -0,0 +1,13 @@
from dataclasses import dataclass
from datetime import datetime
this_year = datetime.strftime(datetime.now(),"%Y")
@dataclass
class AppSettings:
name: str = "SneakyScope"
tagline: str = "A selfhosted URL Sandbox"
url: str = "https://git.sneakygeek.net/ptarrant/SneakyScope"
copyright: str = f"© 2025 - {this_year}"
version_major: int = 1
version_minor: int = 0

View File

@@ -0,0 +1,71 @@
# app/services/changelog_loader.py
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List, Optional, Dict
import yaml
from flask import Blueprint, current_app, render_template
@dataclass
class ChangeItem:
title: str
details: List[str]
@dataclass
class VersionLog:
version: str
features: List[ChangeItem]
refactors: List[ChangeItem]
fixes: List[ChangeItem]
notes: List[str]
@dataclass
class Changelog:
unreleased: Dict[str, List[ChangeItem]]
versions: List[VersionLog]
def _coerce_items(items: Optional[List[Dict[str, Any]]]) -> List[ChangeItem]:
out: List[ChangeItem] = []
for it in items or []:
title = str(it.get("title", "")).strip()
details = [str(d) for d in (it.get("details") or [])]
out.append(ChangeItem(title=title, details=details))
return out
def load_changelog(path: Path) -> Changelog:
"""
Load changelog.yaml and coerce into dataclasses.
"""
data = yaml.safe_load(path.read_text(encoding="utf-8"))
unreleased = {
"features": _coerce_items(data.get("unreleased", {}).get("features")),
"refactors": _coerce_items(data.get("unreleased", {}).get("refactors")),
"fixes": _coerce_items(data.get("unreleased", {}).get("fixes")),
}
versions: List[VersionLog] = []
for v in data.get("versions", []):
versions.append(
VersionLog(
version=str(v.get("version")),
features=_coerce_items(v.get("features")),
refactors=_coerce_items(v.get("refactors")),
fixes=_coerce_items(v.get("fixes")),
notes=[str(n) for n in (v.get("notes") or [])],
)
)
return Changelog(unreleased=unreleased, versions=versions)
bp = Blueprint("changelog", __name__)
@bp.route("/changelog")
def view_changelog():
# Configurable path with sensible default at project root
cfg_path = current_app.config.get("CHANGELOG_FILE")
path = Path(cfg_path) if cfg_path else (Path(current_app.root_path).parent / "changelog.yaml")
changelog = load_changelog(path)
return render_template("changelog.html", changelog=changelog)

View File

@@ -1,6 +1,5 @@
# app/blueprints/ui.py # app/blueprints/ui.py
import os
import json import json
import asyncio import asyncio
from pathlib import Path from pathlib import Path
@@ -9,18 +8,16 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash,
from app.utils.url_tools import get_url_normalizer from app.utils.url_tools import get_url_normalizer
from app.utils.browser import get_browser from app.utils.browser import get_browser
from app.utils.enrichment import enrich_url
from app.utils.settings import get_settings from app.utils.settings import get_settings
from app.utils.io_helpers import get_recent_results from app.utils.io_helpers import get_recent_results
from app.logging_setup import get_app_logger from app.logging_setup import get_app_logger
app_logger = get_app_logger() app_logger = get_app_logger()
bp = Blueprint("main", __name__) bp = Blueprint("main", __name__)
settings = get_settings() settings = get_settings()
app_name = settings.app.name
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
# --- data cleaner for tls to ensure data is standardized # --- data cleaner for tls to ensure data is standardized
@@ -68,9 +65,9 @@ def normalize_ssl_tls_for_view(ssl_tls):
def inject_app_info(): def inject_app_info():
"""Inject app name and version into all templates.""" """Inject app name and version into all templates."""
return { return {
"app_name": app_name, "app_name": current_app.config.get("APP_NAME", "SneakyScope"),
"app_version": app_version, "header": current_app.config.get("WEB_HEADER", "SneakyScope"),
"current_year": datetime.strftime(datetime.now(),"%Y") "footer": current_app.config.get("WEB_FOOTER", "SneakyScope"),
} }
@bp.route("/", methods=["GET"]) @bp.route("/", methods=["GET"])
@@ -115,7 +112,7 @@ def analyze():
app_logger.warning("Empty or invalid URL input") app_logger.warning("Empty or invalid URL input")
return redirect(url_for("index")) return redirect(url_for("index"))
app_logger.info(f"[*] Analyzing URL{target}") app_logger.info(f"[*] Analyzing URL {target}")
app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}") app_logger.info(f"[*] SSL Checks set to {fetch_ssl_enabled}")
if not target: if not target:

View File

@@ -1,8 +1,8 @@
app: branding:
name: SneakyScope # you can brand your sandbox to anything you want
version_major: 0 name: Redwire Sandbox
version_minor: 1
logging:
# logs when rules are loaded # logs when rules are loaded
log_rule_loads: False log_rule_loads: False
@@ -13,15 +13,34 @@ app:
log_rule_debug: False log_rule_debug: False
cache: cache:
# number of recent runs shown on front page
recent_runs_count: 10 recent_runs_count: 10
# how long to cache whois information
whois_cache_days: 7 whois_cache_days: 7
# how long to cache geoip information
geoip_cache_days: 7 geoip_cache_days: 7
# should we cache crt certificate pulls?
crt_cache_enabled: True
# how long to cache certificate information (if above is true)
crt_cache_days: 7
external_script_fetch: external_script_fetch:
# enable ability to pull external scripts
enabled: True enabled: True
# max mb of script to pull if pulling
max_total_mb: 5 max_total_mb: 5
# max time to wait for script to pull if pulling
max_time_ms: 3000 max_time_ms: 3000
# max redirects for external scripts pull if pulling
max_redirects: 3 max_redirects: 3
ui: ui:
# how many char to show in a snippet preview in the gui
snippet_preview_len: 300 snippet_preview_len: 300

80
app/docs/changelog.yaml Normal file
View File

@@ -0,0 +1,80 @@
# changelog.yaml
unreleased:
features: []
refactors: []
fixes: []
versions:
- version: "v0.2"
features:
- title: "UI Modernization"
details:
- "Migrated front-end to Tailwind CSS (compiled) with Flowbite JS components."
- "New navbar and layout system; better navigation and future expansion."
- "Docker-based CSS build for reproducible, lightweight builds."
- title: "Reusable CSS Components"
details:
- "Custom utilities: badge, badge-ok, badge-warn, badge-danger, chip, card, etc."
- "Reduces repetition and enforces consistent look."
- title: "Roadmap / Changelog (YAML-driven + in-app UI)"
details:
- "YAML-backed roadmap, in-app view at `/roadmap`."
- "Roadmap Filters: q, tag, min_priority, milestone; tag chips; Details modal that renders `details`."
- "YAML-backed Changelog, in-app view at `/changelog`."
- title: "Modal sizing & ergonomics"
details:
- "Wider modal at larger breakpoints; scrollable body for long content."
- title: "GeoIP Results Uplift"
details:
- "Cloudflare detection via GeoIP ASN; badge on results page."
- "Country/ASN notes shown beside collapsed IP next to GeoIP results."
- title: "Text Analysis Pipeline (Rules)"
details:
- "`analyse_text()` extracts visible text and evaluates `category: text` rules."
- "Captures matched phrases into deduped `content_snippet` (len capped via `settings.ui.snippet_preview_len`)."
- "Results exposed in JSON as `suspicious_text`; UI via `templates/partials/result_text.html`."
refactors:
- title: "Template Includes"
details:
- "Common UI (headers/footers/layout) extracted into Jinja includes."
- title: "Roadmap loader simplification"
details:
- "Removed cache; returns typed dataclasses and normalizes `details`."
- title: "Safer JSON in templates"
details:
- "Use `|tojson|forceescape` for embedding payloads in data attributes."
- title: "Rules Engine Regex handling"
details:
- "Honor per-rule regex flags; default IGNORECASE for `category: text` if no `i` flag."
- title: "Engine/Scanner logging"
details:
- "Dispatch-time visibility; gated by `settings.app.print_rule_dispatch`."
- title: "Code cleanup"
details:
- "Removed obsolete paths/utilities; removed duplicate `enrich_url` call."
fixes:
- title: "Table Rendering"
details:
- "Locked column widths; fixed snippet scaling to prevent reflow."
- title: "Rules Engine State"
details:
- "Fix pulling engine from app state; restores proper detections."
- title: "YAML parsing edge cases"
details:
- "Quote scalars containing `:`/`#`; use explicit `null` as needed."
- title: "/roadmap page stability"
details:
- "Return structured objects; fix `AttributeError: 'dict' object has no attribute 'roadmap'`."
- title: "Modal population"
details:
- "Pass `details` through route; DOM-ready + delegation populate reliably."
- title: "Text indicators not displayed"
details:
- "Add text analyzer; align result shape with `result_text` partial."
- version: "v0.1"
notes:
- "Initial Flask web UI for URL submission and analysis."
- "Domain & IP enrichment (WHOIS, GeoIP, ASN/ISP)."
- "First Suspicious Rules Engine for scripts/forms."
- "Basic Docker setup for sandboxed deployment."

View File

@@ -2,18 +2,6 @@
updated: "2025-08-22" updated: "2025-08-22"
roadmap: roadmap:
- id: "p1-analysis-cloudflare"
priority: 1
title: "Cloudflare Detection"
goal: "Detect Cloudflare usage and badge it, with explanation of dual-use (security vs. abuse)."
tags: ["analysis"]
milestone: null
details:
- "Detection signals: DNS (CNAME to Cloudflare, AS13335), HTTP headers (cf-ray, cf-cache-status), IP ranges, and challenge pages."
- "UI: add badge + tooltip with a short explainer about legitimate protection vs. abuse evasion."
- "Edge cases: 'grey-clouded' DNS entries, partial proxy (only some records), and CDN in front of non-HTTP services."
- "Acceptance: correctly identifies Cloudflare on known test hosts and avoids false positives on non-CF CDNs."
- id: "p1-analysis-total-score" - id: "p1-analysis-total-score"
priority: 1 priority: 1
title: "Total Score" title: "Total Score"
@@ -192,6 +180,15 @@ backlog:
- "Model: score IPs with decay over time; avoid permanent penalties for stale abuse." - "Model: score IPs with decay over time; avoid permanent penalties for stale abuse."
- "Integration: surface as context; do not overrule domain-level signals." - "Integration: surface as context; do not overrule domain-level signals."
- id: "cache-crt-results"
title: "cache the crt results as a setting"
goal: "Create setting that allows CRT results to be cached for domains"
tags: ["intel"]
milestone: null
details:
- "Setting: Need to be created"
- "Model: Store the result in a way that can be pulled for root domain, and fall back to subdomain if needed"
open_questions: open_questions:
- id: "design-imports-unification" - id: "design-imports-unification"
title: "Imports Unification" title: "Imports Unification"

View File

@@ -34,8 +34,23 @@ def build_rules_engine() -> RuleEngine:
def add(rule: Rule): def add(rule: Rule):
eng.add_rule(rule) eng.add_rule(rule)
add(Rule("form_action_missing", "Form has no action attribute", "form", "function", # Form no action
FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing"))) add(Rule(
name="form_action_missing",
description="Form has no action attribute",
category="form",
rule_type="function",
function=FunctionRuleAdapter(form_action_missing, category="form", adapter=adapter, rule_name="form_action_missing"),
))
add(Rule(
name="form_http_on_https_page",
description="Form submits via HTTP from HTTPS page",
category="form",
rule_type="function",
function=FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page"),
))
add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function", add(Rule("form_http_on_https_page", "Form submits via HTTP from HTTPS page", "form", "function",
FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page"))) FunctionRuleAdapter(form_http_on_https_page, category="form", adapter=adapter, rule_name="form_http_on_https_page")))
add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function", add(Rule("form_submits_to_different_host", "Form submits to a different host", "form", "function",

View File

@@ -143,6 +143,13 @@ class Rule:
return False, "No match" return False, "No match"
if self.rule_type == "function": if self.rule_type == "function":
if not callable(self.function):
logger.warning(
"[Rule] '%s' function is not callable (type=%s, value=%r)",
self.name, type(self.function).__name__, self.function
)
return False, "Invalid rule configuration: function not callable"
if callable(self.function): if callable(self.function):
try: try:
matched, reason = self.function(text) matched, reason = self.function(text)
@@ -255,7 +262,7 @@ class RuleEngine:
) )
return return
if settings.app.log_rule_loads: if settings.logconfig.log_rule_loads:
logger.info( logger.info(
"[engine] add_rule: %s/%s replace=%s -> count=%d", "[engine] add_rule: %s/%s replace=%s -> count=%d",
rule.category, rule.name, bool(replace), len(self._rules) rule.category, rule.name, bool(replace), len(self._rules)
@@ -308,7 +315,7 @@ class RuleEngine:
""" """
# --- dispatch visibility --- if set to true, we log applied categories # --- dispatch visibility --- if set to true, we log applied categories
if getattr(settings.app, "log_rule_dispatch", False): if getattr(settings.logconfig, "log_rule_dispatch", False):
all_cats = [r.category for r in self._rules] all_cats = [r.category for r in self._rules]
cat_counts = Counter(all_cats) cat_counts = Counter(all_cats)
# Which categories are being applied this run? # Which categories are being applied this run?

View File

@@ -20,7 +20,7 @@
<div class="max-w-7xl mx-auto px-4 py-3"> <div class="max-w-7xl mx-auto px-4 py-3">
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<a href="{{ url_for('main.index') }}" class="text-xl font-bold text-white"> <a href="{{ url_for('main.index') }}" class="text-xl font-bold text-white">
SneakyScope {{ header }}
</a> </a>
{# Desktop nav #} {# Desktop nav #}
@@ -35,6 +35,11 @@
Roadmap Roadmap
</a> </a>
</li> </li>
<li>
<a href="{{ url_for('changelog.view_changelog') }}">
Changelog
</a>
</li>
</ul> </ul>
{# Mobile toggle #} {# Mobile toggle #}
@@ -62,6 +67,11 @@
Roadmap Roadmap
</a> </a>
</li> </li>
<li>
<a href="{{ url_for('changelog.view_changelog') }}">
Chnagelog
</a>
</li>
</ul> </ul>
</div> </div>
</div> </div>
@@ -76,7 +86,7 @@
{# Footer #} {# Footer #}
<footer class="bg-nav border-t border-gray-800 text-center p-4"> <footer class="bg-nav border-t border-gray-800 text-center p-4">
<p class="text-sm text-gray-400">© {{ current_year }} SneakyScope {{ app_name }} {{ app_version }} - A selfhosted URL sandbox</p> <p class="text-sm text-gray-400">{{ footer | safe }}</p>
</footer> </footer>
{# Flowbite JS (enables collapse) #} {# Flowbite JS (enables collapse) #}

View File

@@ -0,0 +1,136 @@
{# templates/changelog.html #}
{% extends "base.html" %}
{% block title %}Changelog{% endblock %}
{% block content %}
<div class="mx-auto max-w-6xl px-4 py-6">
<!-- Header -->
<div class="mb-6 flex flex-col gap-2 sm:flex-row sm:items-end sm:justify-between">
<div>
<h1 class="text-2xl font-semibold tracking-tight">SneakyScope Changelog</h1>
{% if updated %}
<p class="text-sm text-gray-400">Last updated: {{ updated }}</p>
{% endif %}
</div>
</div>
{# Unreleased #}
{% set ur = changelog.unreleased %}
{% if ur.features or ur.refactors or ur.fixes %}
<section class="mb-8 rounded-2xl border border-gray-700 bg-gray-900 p-5">
<div class="mb-3 flex items-center gap-3">
<h2 class="text-xl font-semibold">Unreleased</h2>
<span class="badge badge-warn">WIP</span>
</div>
<div class="grid gap-6 md:grid-cols-3">
{% for title, items, icon in [
("✨ Features", ur.features, "✨"),
("🛠️ Refactors", ur.refactors, "🛠️"),
("🐛 Fixes", ur.fixes, "🐛"),
] %}
<div class="rounded-xl border border-gray-800 bg-gray-950 p-4">
<h3 class="mb-2 text-sm font-semibold text-gray-200">{{ title }}</h3>
{% if items and items|length %}
<ul class="space-y-3">
{% for it in items %}
<li class="rounded-lg border border-gray-800 bg-gray-900 p-3">
<div class="mb-1 font-medium">{{ it.title }}</div>
{% if it.details %}
<ul class="ml-5 list-disc text-sm text-gray-300">
{% for d in it.details %}
<li>{{ d }}</li>
{% endfor %}
</ul>
{% endif %}
</li>
{% endfor %}
</ul>
{% else %}
<p class="text-sm text-gray-400">Nothing yet — add upcoming {{ title.split(' ')[1] | lower }} here.</p>
{% endif %}
</div>
{% endfor %}
</div>
</section>
{% endif %}
{# Versions Accordion #}
<section>
<div id="changelog-accordion" data-accordion="collapse" class="divide-y rounded-2xl border border-gray-700 bg-gray-900">
{% for v in changelog.versions %}
<h2 id="acc-head-{{ loop.index }}">
<button type="button"
class="flex w-full items-center justify-between px-5 py-4 text-left hover:bg-gray-800"
data-accordion-target="#acc-body-{{ loop.index }}"
aria-expanded="{{ 'true' if loop.first else 'false' }}"
aria-controls="acc-body-{{ loop.index }}">
<span class="flex items-center gap-3">
<span class="font-semibold">{{ v.version }}</span>
{% if v.notes and not (v.features or v.refactors or v.fixes) %}
<span class="badge badge-ok">Notes only</span>
{% endif %}
</span>
<svg class="h-5 w-5 text-gray-300" aria-hidden="true" fill="none" viewBox="0 0 10 6">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5 5 1 1 5"/>
</svg>
</button>
</h2>
<div id="acc-body-{{ loop.index }}"
class="{{ '' if loop.first else 'hidden' }}"
aria-labelledby="acc-head-{{ loop.index }}">
<div class="space-y-8 px-5 pb-5 pt-1">
{% if v.notes and v.notes|length %}
<div>
<h3 class="mb-2 text-sm font-semibold text-gray-200">Notes</h3>
<ul class="ml-6 list-disc text-sm text-gray-300">
{% for n in v.notes %}
<li>{{ n }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% for section_title, items in [
("✨ Features", v.features),
("🛠️ Refactors", v.refactors),
("🐛 Fixes", v.fixes),
] %}
{% if items and items|length %}
<div>
<h3 class="mb-2 text-sm font-semibold text-gray-200">{{ section_title }}</h3>
<div class="grid grid-cols-1 gap-3 md:grid-cols-2">
{% for it in items %}
<article class="rounded-2xl border border-gray-800 bg-gray-950 p-4">
<h4 class="mb-1 font-semibold leading-snug">{{ it.title }}</h4>
{% if it.details %}
<ul class="ml-5 list-disc text-sm text-gray-300">
{% for d in it.details %}
<li>{{ d }}</li>
{% endfor %}
</ul>
{% endif %}
</article>
{% endfor %}
</div>
</div>
{% endif %}
{% endfor %}
</div>
</div>
{% endfor %}
</div>
</section>
</div>
{% endblock %}
{% block scripts %}
{# If youre not auto-initializing Flowbite elsewhere, ensure its JS is loaded globally. #}
<script>
/* Optional: if you ever render details as HTML snippets, ensure they are trusted or sanitized server-side. */
/* No extra JS needed here if Flowbite handles [data-accordion]. */
</script>
{% endblock %}

View File

@@ -110,7 +110,7 @@
<div class="min-h-screen flex items-center justify-center p-4 text-center"> <div class="min-h-screen flex items-center justify-center p-4 text-center">
<div class="bg-card border border-gray-800 rounded-xl px-6 py-5 shadow"> <div class="bg-card border border-gray-800 rounded-xl px-6 py-5 shadow">
<div class="mx-auto mb-3 h-12 w-12 rounded-full border-4 border-white/30 border-t-white animate-spin"></div> <div class="mx-auto mb-3 h-12 w-12 rounded-full border-4 border-white/30 border-t-white animate-spin"></div>
<div class="text-base">Analyzing website…</div> <div class="text-base">Analyzing website…<br /> If you are pulling certificates, this may take a long time </div>
</div> </div>
</div> </div>
</div> </div>
@@ -142,11 +142,7 @@ function hideSpinner() {
} }
/** /**
* Initialize form submit handling: * Initialize form submit handling.
* - shows overlay spinner
* - disables submit button
* - shows small spinner inside button
* - lets the browser continue with POST
*/ */
(function initAnalyzeForm() { (function initAnalyzeForm() {
const form = document.getElementById('analyze-form'); const form = document.getElementById('analyze-form');
@@ -155,11 +151,16 @@ function hideSpinner() {
const submitBtn = form.querySelector('button[type="submit"]'); const submitBtn = form.querySelector('button[type="submit"]');
const btnSpinner = document.getElementById('btn-spinner'); const btnSpinner = document.getElementById('btn-spinner');
// Hide spinner overlay if arriving from bfcache/back // Only hide the overlay when returning via BFCache (back/forward)
window.addEventListener('pageshow', () => { window.addEventListener('pageshow', (e) => {
hideSpinner(); const nav = performance.getEntriesByType('navigation')[0];
if (submitBtn) submitBtn.disabled = false; const isBFCache = e.persisted || nav?.type === 'back_forward';
if (btnSpinner) btnSpinner.classList.add('hidden');
if (isBFCache) {
hideSpinner();
if (submitBtn) submitBtn.disabled = false;
if (btnSpinner) btnSpinner.classList.add('hidden');
}
}); });
form.addEventListener('submit', (e) => { form.addEventListener('submit', (e) => {

View File

@@ -33,7 +33,15 @@
<h3 class="text-base font-semibold mt-4 mb-2">GeoIP</h3> <h3 class="text-base font-semibold mt-4 mb-2">GeoIP</h3>
{% for ip, info in enrichment.geoip.items() %} {% for ip, info in enrichment.geoip.items() %}
<details class="border border-gray-800 rounded-lg mb-2"> <details class="border border-gray-800 rounded-lg mb-2">
<summary class="px-3 py-2 cursor-pointer hover:bg-gray-900/50">{{ ip }}</summary> <summary class="px-3 py-2 cursor-pointer hover:bg-gray-900/50">
{{ ip }} -
{% if info.country %} {{ info.country }} {% endif %} -
{% if info.isp %} {{ info.isp }} {% endif %}
{% if info.cloudflare %}
<span class="badge badge-warn">Cloudflare </span>
{% endif %}
</summary>
<div class="px-3 pb-3 overflow-x-auto"> <div class="px-3 pb-3 overflow-x-auto">
<table class="min-w-full text-sm"> <table class="min-w-full text-sm">
<tbody> <tbody>

View File

@@ -1,6 +1,6 @@
<!-- /templates/partials/result_forms.html --> <!-- /templates/partials/result_forms.html -->
<section id="forms" class="card"> <section id="forms" class="card">
<h2 class="text-lg font-semibold mb-3">Forms</h2> <h2 class="text-lg font-semibold mb-3">Suspicious Form Hits</h2>
{% if forms and forms|length > 0 %} {% if forms and forms|length > 0 %}
<div class="overflow-x-auto"> <div class="overflow-x-auto">

View File

@@ -1,6 +1,6 @@
<!-- /templates/partials/result_text.html --> <!-- /templates/partials/result_text.html -->
<section id="sus_text" class="card"> <section id="sus_text" class="card">
<h2 class="text-lg font-semibold mb-3">Text</h2> <h2 class="text-lg font-semibold mb-3">Suspicious Text</h2>
{% if suspicious_text and suspicious_text|length > 0 %} {% if suspicious_text and suspicious_text|length > 0 %}
<div class="overflow-x-auto"> <div class="overflow-x-auto">

View File

@@ -28,14 +28,23 @@
<p><span class="text-gray-400">Submitted URL:</span> <span class="break-all">{{ submitted_url }}</span></p> <p><span class="text-gray-400">Submitted URL:</span> <span class="break-all">{{ submitted_url }}</span></p>
<p> <p>
<span class="text-gray-400">Final URL:</span> <span class="text-gray-400">Final URL:</span>
<a href="{{ final_url }}" target="_blank" rel="noopener" class="break-all hover:text-blue-400">{{ final_url }}</a> <span class="break-all">{{ final_url }}</span>
</p> </p>
<p> <p>
<span class="text-gray-400">Permalink:</span> <span class="text-gray-400">Permalink:</span>
<a href="{{ url_for('main.view_result', run_uuid=uuid, _external=True) }}" class="break-all hover:text-blue-400"> <a href="{{ url_for('main.view_result', run_uuid=uuid, _external=True) }}" class="break-all hover:text-blue-400">
{{ request.host_url }}results/{{ uuid }} Permalink for {{ uuid }}
</a> </a>
</p> </p>
<p>
<span class="text-gray-400">Full Results File:</span>
<a href="{{ url_for('main.view_artifact', run_uuid=uuid, filename='results.json') }}"
target="_blank" rel="noopener"
class="break-all hover:text-blue-400">
Results File
</a>
</p>
<p><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p> <p><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
</div> </div>
</section> </section>

View File

@@ -30,6 +30,8 @@ days = 24 * 60
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
CRT_DEFAULT_TTL = settings.cache.crt_cache_days * days
logger = get_app_logger() logger = get_app_logger()
@@ -137,6 +139,20 @@ def search_certs(domain, wildcard=True, expired=True, deduplicate=True):
"not_before": "2018-02-08T15:47:39" "not_before": "2018-02-08T15:47:39"
} }
""" """
cache_key = f"crt_cert:{domain}"
# log if caching is turned on or not
logger.info(f"CRT Cache is set to: {settings.cache.crt_cache_enabled}")
if settings.cache.crt_cache_enabled:
cached = cache.read(cache_key)
if cached:
logger.info(f"[CACHE HIT] for CRT Cert: {domain}")
return cached
else:
logger.info(f"[CACHE MISS] for CRT Cert: {domain} - {cache_key}")
base_url = "https://crt.sh/?q={}&output=json" base_url = "https://crt.sh/?q={}&output=json"
if not expired: if not expired:
base_url = base_url + "&exclude=expired" base_url = base_url + "&exclude=expired"
@@ -153,11 +169,21 @@ def search_certs(domain, wildcard=True, expired=True, deduplicate=True):
try: try:
content = req.content.decode('utf-8') content = req.content.decode('utf-8')
data = json.loads(content) data = json.loads(content)
# if caching
if settings.cache.crt_cache_enabled:
logger.info(f"Setting Cache for {cache_key}")
cache.create(cache_key, data, CRT_DEFAULT_TTL)
return data return data
except ValueError: except ValueError:
# crt.sh fixed their JSON response. This shouldn't be necessary anymore # crt.sh fixed their JSON response. This shouldn't be necessary anymore
# https://github.com/crtsh/certwatch_db/commit/f4f46ea37c23543c4cdf1a3c8867d68967641807 # https://github.com/crtsh/certwatch_db/commit/f4f46ea37c23543c4cdf1a3c8867d68967641807
data = json.loads("[{}]".format(content.replace('}{', '},{'))) data = json.loads("[{}]".format(content.replace('}{', '},{')))
# if caching
if settings.cache.crt_cache_enabled:
logger.info(f"Setting Cache for {cache_key}")
cache.create(cache_key, data, CRT_DEFAULT_TTL)
return data return data
except Exception as err: except Exception as err:
logger.error("Error retrieving cert information from CRT.sh.") logger.error("Error retrieving cert information from CRT.sh.")
@@ -200,6 +226,7 @@ def gather_crtsh_certs_for_target(target):
hostname = parse_target_to_host(target) hostname = parse_target_to_host(target)
result["hostname"] = hostname result["hostname"] = hostname
# return fake return if no hostname was able to be parsed
if hostname is None: if hostname is None:
return result return result
@@ -209,6 +236,7 @@ def gather_crtsh_certs_for_target(target):
# Always query crt.sh for the specific hostname # Always query crt.sh for the specific hostname
# (expired=False means we filter expired) # (expired=False means we filter expired)
host_certs = search_certs(hostname, wildcard=False, expired=False) host_certs = search_certs(hostname, wildcard=False, expired=False)
result["crtsh"]["host_certs"] = host_certs result["crtsh"]["host_certs"] = host_certs
@@ -308,6 +336,9 @@ def enrich_whois(hostname: str) -> dict:
def enrich_geoip(hostname: str) -> dict: def enrich_geoip(hostname: str) -> dict:
"""Resolve hostname to IPs and fetch info from ip-api.com.""" """Resolve hostname to IPs and fetch info from ip-api.com."""
CLOUDFLARE_ASN = "AS13335 Cloudflare"
geo_info = {} geo_info = {}
ips = extract_ips_from_url(hostname) ips = extract_ips_from_url(hostname)
for ip in ips: for ip in ips:
@@ -324,6 +355,12 @@ def enrich_geoip(hostname: str) -> dict:
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5) resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
if resp.status_code == 200: if resp.status_code == 200:
geo_info[ip_str] = resp.json() geo_info[ip_str] = resp.json()
asname = geo_info[ip_str].get("as")
# if behind cloudflare
if CLOUDFLARE_ASN in asname:
geo_info[ip_str].update({"cloudflare":True})
else: else:
geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"} geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"}
except Exception as e: except Exception as e:

View File

@@ -53,27 +53,34 @@ class UIConfig:
@dataclass @dataclass
class Cache_Config: class Cache_Config:
recent_runs_count: int = 10
whois_cache_days: int = 7 whois_cache_days: int = 7
geoip_cache_days: int = 7 geoip_cache_days: int = 7
recent_runs_count: int = 10
crt_cache_enabled: bool = True
crt_cache_days: int = 7
@dataclass @dataclass
class AppConfig: class Logging_Config:
name: str = "MyApp"
version_major: int = 1
version_minor: int = 0
log_rule_loads: bool = False log_rule_loads: bool = False
log_rule_dispatch: bool = False log_rule_dispatch: bool = False
log_rule_debug: bool = False log_rule_debug: bool = False
@dataclass
class BrandingConfig:
name: str = "MyApp"
@dataclass @dataclass
class Settings: class Settings:
cache: Cache_Config = field(default_factory=Cache_Config) cache: Cache_Config = field(default_factory=Cache_Config)
ui: UIConfig = field(default_factory=UIConfig) ui: UIConfig = field(default_factory=UIConfig)
external_fetch: External_FetchConfig = field(default_factory=External_FetchConfig) external_fetch: External_FetchConfig = field(default_factory=External_FetchConfig)
app: AppConfig = field(default_factory=AppConfig) branding: BrandingConfig = field(default_factory=BrandingConfig)
logconfig: Logging_Config = field(default_factory=Logging_Config)
@classmethod @classmethod
def from_yaml(cls, path: str | Path) -> "Settings": def from_yaml(cls, path: str | Path) -> "Settings":

View File

@@ -9,7 +9,7 @@ from . import create_app
# Gunicorn will look for "app" # Gunicorn will look for "app"
app = create_app() app = create_app()
from app.state import set_rules_engine, get_rules_engine from app.state import set_rules_engine
from app.logging_setup import get_app_logger from app.logging_setup import get_app_logger
from app.rules.factory import build_rules_engine from app.rules.factory import build_rules_engine

View File

@@ -17,7 +17,7 @@ This project follows [Semantic Versioning](https://semver.org/).
- _Nothing yet — add upcoming fixes here._ - _Nothing yet — add upcoming fixes here._
--- ---
## [v0.2]
### ✨ Features ### ✨ Features
@@ -37,6 +37,10 @@ This project follows [Semantic Versioning](https://semver.org/).
* **Modal sizing & ergonomics** * **Modal sizing & ergonomics**
Increased modal width at larger breakpoints and made the body scrollable so long details dont squish other content. Increased modal width at larger breakpoints and made the body scrollable so long details dont squish other content.
* **GeoIP Results Uplift**
Added Cloudflare detection via Geoip ASN results and Cloudflare badge on results page
Added Country - ASN notes beside collapsed IP next to GeoIP results for quick viewing.
* **Text Analysis Pipeline (Rules)** * **Text Analysis Pipeline (Rules)**
Implemented `analyse_text()` to extract visible page text and evaluate `category: text` rules. Implemented `analyse_text()` to extract visible page text and evaluate `category: text` rules.
Captures matched phrases into a deduped `content_snippet` (length capped via `settings.ui.snippet_preview_len`). Captures matched phrases into a deduped `content_snippet` (length capped via `settings.ui.snippet_preview_len`).
@@ -88,7 +92,7 @@ This project follows [Semantic Versioning](https://semver.org/).
--- ---
## [v0.1] Initial Work ## [v0.1]
- Implemented initial **Flask-based web UI** for URL submission and analysis. - Implemented initial **Flask-based web UI** for URL submission and analysis.
- Added **domain & IP enrichment** (WHOIS, GeoIP, ASN/ISP lookups). - Added **domain & IP enrichment** (WHOIS, GeoIP, ASN/ISP lookups).

View File

@@ -0,0 +1,24 @@
Blueprint -> Analyse calls browser.fetch_page_artifacts
Browser.py -> fetch_page_artifacts
grabs html_content
run analyse_forms on html_content
run analyse_scripts on html_content
enrich_url
build_rule_checks_overview
builds the structured dict for return
dumps data to disk
text flag mapping for rules
# --- Helper: map human-friendly flags to re.* constants ---
FLAG_MAP = {
"i": _re.IGNORECASE, "ignorecase": _re.IGNORECASE,
"m": _re.MULTILINE, "multiline": _re.MULTILINE,
"s": _re.DOTALL, "dotall": _re.DOTALL, "singleline": _re.DOTALL,
"x": _re.VERBOSE, "verbose": _re.VERBOSE,
"a": _re.ASCII, "ascii": _re.ASCII,
"u": _re.UNICODE, "unicode": _re.UNICODE,
"l": _re.LOCALE, "locale": _re.LOCALE,
}

View File

@@ -0,0 +1,113 @@
Got it — heres a structured outline you can keep as a “memory primer” for when you want to resume this work (whether continuing the current engine or pivoting to a SaaS build). Think of it as a **checkpoint map** of what weve covered and what directions you flagged.
---
# SneakyScope Rules Engine Refactor & SaaS Considerations (Outline)
## 1. Current State
* Engine: `app/rules/rules_engine.py` — mixed YAML + code rules, `(bool, str)` return shape expected.
* Issue: function-based rules returning `(bool, dict)` → caused invalid type warnings.
* Stopgap: `FunctionRuleAdapter` converted returns.
* Desire: Remove YAML entirely; rules defined in code only.
---
## 2. Agreed Direction
* **Rule definition approach**:
* Option B chosen → decorator-based registration.
* Every rule defined in `app/rules/` as Python functions.
* Rules register with metadata (`name`, `category`, `severity`, etc.).
* **Return shape**:
* Always return a **Result dict** (no adapter needed).
* Engine enforces schema and fills in defaults.
* **Engine relocation**:
* Move to `app/utils/rules_engine/`.
* Responsibilities: load, validate, freeze registry, run rules, aggregate results, log/report.
---
## 3. Result Schema (concept)
* **Per RuleResult**
* Required: `ok: bool`, `message: str`.
* Identity: `name`, `category`, `severity`, `tags`, `rule_version`.
* Detail: `data: object|null`.
* Timing: `duration_ms`.
* Errors: structured `error` object if exceptions occur.
* Provenance: `source_module`, optional `policy` snapshot.
* **Per AnalysisResult (run-level envelope)**
* Input scope: target URL, category, content hash, facts profile.
* Provenance: run\_id, engine\_version, ruleset\_checksum, timestamp, duration.
* Results: array of RuleResults.
* Summary: counts by severity, match count, errors, first match, top severity.
* Artifacts: references (screenshot, DOM snapshot, etc.).
* Policy snapshot: optional central policy/overrides.
---
## 4. Operational Standards
* **Determinism**: identical inputs + ruleset\_checksum → identical results.
* **Message stability**: avoid wording churn; expand via `data`.
* **Size limits**: `message ≤ 256 chars`; `data ≤ 816 KB`.
* **Errors**: `ok=false` if error present; always emit `message`.
* **Severity**: rule sets default; policy may override.
* **Tags**: controlled vocabulary; additive.
---
## 5. Migration Plan
1. Create new `rules_engine` package in `app/utils/`.
2. Add decorator/registry for rules.
3. Port all rules from YAML → Python modules grouped by category.
4. Delete YAML loader + adapters.
5. Update call sites to build `facts` and call `engine.run(...)`.
6. Add CI tests:
* Schema compliance.
* No duplicates.
* Ruleset checksum snapshot.
7. Integration tests with real fixtures.
8. Benchmark & harden (caps on input size, rule runtime).
---
## 6. SaaS Expansion (future)
* **Multi-tenancy**: separate org/user scopes for data and rule runs.
* **RBAC**: roles (admin, analyst, viewer).
* **Compliance**: logging, retention, export, audit trails.
* **Rules**: centrally maintained, not user-editable.
* **APIs**: authenticated endpoints, per-user quotas.
* **Observability**: per-tenant metrics, alerts.
* **Security**: sandboxing, strict module allowlists, compliance with SOC2/ISO.
* **Data controls**: PII redaction, encryption, retention policies.
---
## 7. Future-Proofing Hooks
* Versioning: ruleset checksum + per-rule versions.
* Extensibility: support `actions`, `links`, `evidence` in Result.
* Policy: central config for thresholds/overrides.
* Hot reload (optional, dev-only).
* Rule provenance tracking (source\_module, commit SHA).
---
✅ This outline is enough to “re-hydrate” the context later — you wont need to dig back into old logs to remember why `(bool, str)` didnt fit, why YAML was removed, or what schema we were converging on.
---
Do you want me to also save this in a **short “README-spec” style** (like `RESULTS.md`) so it can live in your repo as the contract doc for rules, or should I keep this as just your personal checkpoint outline?