first commit
This commit is contained in:
82
app/__init__.py
Normal file
82
app/__init__.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
app/__init__.py
|
||||
|
||||
Application factory and startup hooks for SneakyScope.
|
||||
|
||||
Responsibilities:
|
||||
- Create the Flask app.
|
||||
- Load settings (YAML -> dataclasses) with safe defaults.
|
||||
- Initialize and load the Suspicious Rules Engine from YAML.
|
||||
- Register blueprints (routes).
|
||||
- Configure core paths (e.g., SANDBOX_STORAGE).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from flask import Flask
|
||||
|
||||
# Local imports
|
||||
from .utils.settings import get_settings
|
||||
from .utils import io_helpers # if you need logging/setup later
|
||||
from .utils import cache_db # available for future injections
|
||||
from .utils.rules_engine import RuleEngine, load_rules_from_yaml # rules engine
|
||||
from . import routes # blueprint
|
||||
|
||||
|
||||
def create_app() -> Flask:
|
||||
"""
|
||||
Create and configure the Flask application instance.
|
||||
|
||||
Returns:
|
||||
Flask: The configured Flask app.
|
||||
"""
|
||||
# Basic app object
|
||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||
|
||||
# Load settings (safe fallback to defaults if file missing)
|
||||
settings = get_settings()
|
||||
|
||||
# Secret key loaded from env
|
||||
app.secret_key = os.getenv("SECRET_KEY")
|
||||
|
||||
# Configure storage directory (bind-mount is still handled by sandbox.sh)
|
||||
sandbox_storage_default = Path("/data")
|
||||
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
|
||||
|
||||
# Initialize Suspicious Rules Engine at startup
|
||||
# Determine rules file path relative to this package
|
||||
base_dir = Path(__file__).resolve().parent
|
||||
rules_path = base_dir / "config" / "suspicious_rules.yaml"
|
||||
|
||||
# Create an engine instance (even if file missing, we still want an engine)
|
||||
engine = RuleEngine()
|
||||
|
||||
# Try to load from YAML if present; log clearly if not
|
||||
if rules_path.exists():
|
||||
try:
|
||||
loaded_rules = load_rules_from_yaml(rules_path)
|
||||
# Add rules one-by-one (explicit)
|
||||
for rule in loaded_rules:
|
||||
engine.add_rule(rule)
|
||||
app.logger.info(f"[+] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"[!] Failed loading rules from {rules_path}: {e}")
|
||||
else:
|
||||
app.logger.warning(f"[!] Rules file not found at {rules_path}. Engine will start with zero rules.")
|
||||
|
||||
# Store engine on app config so it is accessible via current_app
|
||||
app.config["RULE_ENGINE"] = engine
|
||||
|
||||
# Make app name/version available for templates here if you want it globally
|
||||
app.config["APP_NAME"] = settings.app.name
|
||||
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
|
||||
|
||||
# Register blueprints
|
||||
app.register_blueprint(routes.bp)
|
||||
|
||||
# Example log line so we know we booted cleanly
|
||||
app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
||||
app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
||||
|
||||
return app
|
||||
400
app/browser.py
Normal file
400
app/browser.py
Normal file
@@ -0,0 +1,400 @@
|
||||
import re
|
||||
import uuid
|
||||
import json
|
||||
from pathlib import Path
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import Dict, Any, Optional
|
||||
from playwright.async_api import async_playwright, TimeoutError as PWTimeoutError
|
||||
|
||||
from flask import current_app # access the rule engine from app config
|
||||
|
||||
from app.utils.io_helpers import safe_write
|
||||
from .enrichment import enrich_url
|
||||
|
||||
def get_rule_engine():
|
||||
"""
|
||||
Retrieve the rules engine instance from the Flask application config.
|
||||
|
||||
Returns:
|
||||
RuleEngine or None: The engine if available, or None if not configured.
|
||||
"""
|
||||
try:
|
||||
# current_app is only available during an active request context
|
||||
engine = current_app.config.get("RULE_ENGINE")
|
||||
return engine
|
||||
except Exception:
|
||||
# If called outside a Flask request context, fail gracefully
|
||||
return None
|
||||
|
||||
|
||||
def run_rule_checks(text, category):
|
||||
"""
|
||||
Run all rules for a given category against the provided text.
|
||||
|
||||
Args:
|
||||
text (str): The content to test (e.g., form snippet, inline JS).
|
||||
category (str): The rule category to run (e.g., 'form' or 'script').
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
"checks": [ { "rule": str, "category": str, "matched": bool, "reason": Optional[str] }, ... ],
|
||||
"summary": { "matched_count": int, "total_rules": int }
|
||||
}
|
||||
"""
|
||||
result = {
|
||||
"checks": [],
|
||||
"summary": {
|
||||
"matched_count": 0,
|
||||
"total_rules": 0
|
||||
}
|
||||
}
|
||||
|
||||
engine = get_rule_engine()
|
||||
if engine is None:
|
||||
# No engine configured; return empty but well-formed structure
|
||||
return result
|
||||
|
||||
try:
|
||||
# Run engine rules for the specified category
|
||||
check_results = engine.run_all(text, category=category)
|
||||
|
||||
# Normalize results into the expected structure
|
||||
total = 0
|
||||
matched = 0
|
||||
|
||||
for item in check_results:
|
||||
# item is expected to contain: rule, category, matched, reason (optional)
|
||||
total = total + 1
|
||||
if bool(item.get("matched")):
|
||||
matched = matched + 1
|
||||
|
||||
normalized = {
|
||||
"rule": item.get("rule"),
|
||||
"category": item.get("category"),
|
||||
"matched": bool(item.get("matched")),
|
||||
"reason": item.get("reason")
|
||||
}
|
||||
result["checks"].append(normalized)
|
||||
|
||||
result["summary"]["matched_count"] = matched
|
||||
result["summary"]["total_rules"] = total
|
||||
|
||||
except Exception as e:
|
||||
# If anything goes wrong, keep structure and add a fake failure note
|
||||
result["checks"].append({
|
||||
"rule": "engine_error",
|
||||
"category": category,
|
||||
"matched": False,
|
||||
"reason": f"Rule engine error: {e}"
|
||||
})
|
||||
result["summary"]["matched_count"] = 0
|
||||
result["summary"]["total_rules"] = 0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def analyze_forms(html: str, base_url: str):
|
||||
"""
|
||||
Parse forms from the page HTML and apply heuristic flags and rule-based checks.
|
||||
|
||||
Args:
|
||||
html (str): The full page HTML.
|
||||
base_url (str): The final URL of the page (used for hostname comparisons).
|
||||
|
||||
Returns:
|
||||
list[dict]: A list of form analysis dictionaries, each including:
|
||||
- action, method, inputs
|
||||
- flagged (bool), flag_reasons (list[str]), status (str)
|
||||
- rule_checks: dict with "checks" (list) and "summary" (dict)
|
||||
"""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
forms_info = []
|
||||
page_hostname = urlparse(base_url).hostname
|
||||
|
||||
for form in soup.find_all("form"):
|
||||
action = form.get("action")
|
||||
method = form.get("method", "get").lower()
|
||||
|
||||
# Build explicit inputs list
|
||||
inputs = []
|
||||
for inp in form.find_all("input"):
|
||||
input_name = inp.get("name")
|
||||
input_type = inp.get("type", "text")
|
||||
inputs.append({
|
||||
"name": input_name,
|
||||
"type": input_type
|
||||
})
|
||||
|
||||
flagged_reasons = []
|
||||
|
||||
# No action specified
|
||||
if not action or str(action).strip() == "":
|
||||
flagged_reasons.append("No action specified")
|
||||
|
||||
# External host
|
||||
else:
|
||||
try:
|
||||
action_host = urlparse(action).hostname
|
||||
if not str(action).startswith("/") and action_host != page_hostname:
|
||||
flagged_reasons.append("Submits to a different host")
|
||||
except Exception:
|
||||
# If hostname parsing fails, skip this condition quietly
|
||||
pass
|
||||
|
||||
# HTTP form on HTTPS page
|
||||
try:
|
||||
if urlparse(action).scheme == "http" and urlparse(base_url).scheme == "https":
|
||||
flagged_reasons.append("Submits over insecure HTTP")
|
||||
except Exception:
|
||||
# If scheme parsing fails, ignore
|
||||
pass
|
||||
|
||||
# Hidden password / suspicious hidden inputs
|
||||
for hidden in form.find_all("input", type="hidden"):
|
||||
name_value = hidden.get("name") or ""
|
||||
if "password" in name_value.lower():
|
||||
flagged_reasons.append("Hidden password field")
|
||||
|
||||
flagged = bool(flagged_reasons)
|
||||
|
||||
# Serialize a simple form snippet for the rules engine to analyze (category='form')
|
||||
snippet_lines = []
|
||||
snippet_lines.append(f"action={action}")
|
||||
snippet_lines.append(f"method={method}")
|
||||
snippet_lines.append("inputs=")
|
||||
for item in inputs:
|
||||
snippet_lines.append(f" - name={item.get('name')} type={item.get('type')}")
|
||||
form_snippet = "\n".join(snippet_lines)
|
||||
|
||||
rule_checks = run_rule_checks(form_snippet, category="form")
|
||||
|
||||
forms_info.append({
|
||||
"action": action,
|
||||
"method": method,
|
||||
"inputs": inputs,
|
||||
"flagged": flagged,
|
||||
"flag_reasons": flagged_reasons,
|
||||
"status": "flagged" if flagged else "possibly safe",
|
||||
"rule_checks": rule_checks
|
||||
})
|
||||
|
||||
return forms_info
|
||||
|
||||
|
||||
def analyze_scripts(html: str, base_url: str = "", engine=None) -> list[dict]:
|
||||
"""
|
||||
Analyze <script> elements using the RuleEngine (if provided) and
|
||||
lightweight built-in heuristics. Only append a record when at least
|
||||
one rule or heuristic matches, and always set a sensible 'type'.
|
||||
|
||||
Returns list of dicts like:
|
||||
{
|
||||
"type": "external" | "inline" | "unknown",
|
||||
"src": "...", # for external
|
||||
"content_snippet": "...", # for inline
|
||||
"rules": [ { "name": "...", "description": "..." }, ... ],
|
||||
"heuristics": [ "reason1", "reason2", ... ]
|
||||
}
|
||||
"""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
results: list[dict] = []
|
||||
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Benign MIME types we ignore entirely
|
||||
benign_types = {"application/ld+json", "application/json"}
|
||||
|
||||
# Suspicious file extensions for external scripts
|
||||
dangerous_ext = (".vbs", ".hta")
|
||||
|
||||
# Inline red flags
|
||||
risky_inline_patterns = [
|
||||
(re.compile(r"\beval\s*\(", re.IGNORECASE), "Uses eval()"),
|
||||
(re.compile(r"\bnew\s+Function\s*\(", re.IGNORECASE), "Uses Function constructor"),
|
||||
(re.compile(r"\bdocument\.write\s*\(", re.IGNORECASE), "Uses document.write()"),
|
||||
(re.compile(r"\bActiveXObject\s*\(", re.IGNORECASE), "Uses ActiveXObject (IE-only)"),
|
||||
(re.compile(r"\batob\s*\(", re.IGNORECASE), "Uses atob() (possible obfuscation)"),
|
||||
(re.compile(r"\bunescape\s*\(", re.IGNORECASE), "Uses unescape() (legacy/obfuscation)"),
|
||||
(re.compile(r"\bset(?:Timeout|Interval)\s*\(\s*['\"`].+['\"`]\s*,", re.IGNORECASE),
|
||||
"String passed to setTimeout/setInterval"),
|
||||
(re.compile(r"[\"']?0x[0-9a-fA-F]{16,}[\"']?", re.IGNORECASE),
|
||||
"Contains long hex-like constants (possible obfuscation)"),
|
||||
]
|
||||
|
||||
base_host = urlparse(base_url).hostname or ""
|
||||
|
||||
for script in soup.find_all("script"):
|
||||
try:
|
||||
src = (script.get("src") or "").strip()
|
||||
s_type_attr = (script.get("type") or "").strip().lower()
|
||||
|
||||
# IMPORTANT: .string is often None; get_text() is reliable
|
||||
inline_text = script.get_text(strip=True) or ""
|
||||
|
||||
# Skip benign structured data outright
|
||||
if s_type_attr in benign_types:
|
||||
continue
|
||||
|
||||
# ---- Build facts for the rules engine
|
||||
facts = {
|
||||
"script_type_attr": s_type_attr or None,
|
||||
"has_src": bool(src),
|
||||
"src": src or None,
|
||||
"attrs": dict(script.attrs),
|
||||
"inline_len": len(inline_text),
|
||||
"inline_preview": inline_text[:200].replace("\n", " ") if inline_text else None,
|
||||
"base_url": base_url or None,
|
||||
"base_hostname": base_host or None,
|
||||
"src_hostname": urlparse(src).hostname if src else None,
|
||||
}
|
||||
|
||||
# ---- Evaluate rules engine (using name/description)
|
||||
engine_matches: list[dict] = []
|
||||
if engine is not None:
|
||||
try:
|
||||
if hasattr(engine, "evaluate_script"):
|
||||
matches = engine.evaluate_script(facts)
|
||||
elif hasattr(engine, "evaluate"):
|
||||
matches = engine.evaluate(facts)
|
||||
else:
|
||||
matches = []
|
||||
|
||||
if isinstance(matches, list):
|
||||
for m in matches:
|
||||
if isinstance(m, dict) and "name" in m:
|
||||
engine_matches.append({
|
||||
"name": m["name"],
|
||||
"description": m.get("description", "")
|
||||
})
|
||||
elif isinstance(m, str):
|
||||
engine_matches.append({"name": m, "description": ""})
|
||||
except Exception as e:
|
||||
engine_matches.append({"name": "Rules Engine Error", "description": str(e)})
|
||||
|
||||
# ---- Built-in heuristics
|
||||
heuristics: list[str] = []
|
||||
if src:
|
||||
# Unusual URL schemes for script sources
|
||||
if src.startswith(("data:", "blob:")):
|
||||
heuristics.append("Script src uses data:/blob: URL")
|
||||
# Dangerous extensions
|
||||
for ext in dangerous_ext:
|
||||
if src.lower().endswith(ext):
|
||||
heuristics.append(f"External script with dangerous extension ({ext.lstrip('.')})")
|
||||
break
|
||||
# Third-party host hint
|
||||
src_host = facts.get("src_hostname") or ""
|
||||
if base_host and src_host and src_host != base_host:
|
||||
heuristics.append(f"Third-party host: {src_host}")
|
||||
else:
|
||||
if inline_text:
|
||||
for pat, why in risky_inline_patterns:
|
||||
if pat.search(inline_text):
|
||||
heuristics.append(why)
|
||||
|
||||
# ---- Only append when something matched; always set type
|
||||
if engine_matches or heuristics:
|
||||
record: dict = {}
|
||||
|
||||
if src:
|
||||
record["type"] = "external"
|
||||
record["src"] = src
|
||||
elif inline_text:
|
||||
record["type"] = "inline"
|
||||
record["content_snippet"] = facts.get("inline_preview")
|
||||
else:
|
||||
record["type"] = "unknown"
|
||||
|
||||
if engine_matches:
|
||||
record["rules"] = engine_matches
|
||||
if heuristics:
|
||||
record["heuristics"] = heuristics
|
||||
|
||||
results.append(record)
|
||||
|
||||
except Exception as e:
|
||||
# Never let a single broken <script> kill the whole analysis
|
||||
results.append({
|
||||
"type": "unknown",
|
||||
"heuristics": [f"Script analysis error: {e}"]
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def fetch_page_artifacts(url: str, storage_dir: Path, engine=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch page artifacts and save them in a UUID-based directory.
|
||||
|
||||
Args:
|
||||
url (str): URL to analyze.
|
||||
storage_dir (Path): Base /data path.
|
||||
engine: Optional rules engine instance (from app.config["RULE_ENGINE"]).
|
||||
"""
|
||||
run_uuid = str(uuid.uuid4())
|
||||
run_dir = storage_dir / run_uuid
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
screenshot_path = run_dir / "screenshot.png"
|
||||
source_path = run_dir / "source.txt"
|
||||
results_path = run_dir / "results.json"
|
||||
|
||||
redirects = []
|
||||
downloads = []
|
||||
scripts = []
|
||||
|
||||
async with async_playwright() as pw:
|
||||
browser = await pw.chromium.launch(
|
||||
headless=True,
|
||||
args=["--no-sandbox", "--disable-dev-shm-usage", "--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
context = await browser.new_context(
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
|
||||
java_script_enabled=True,
|
||||
locale="en-US"
|
||||
)
|
||||
page = await context.new_page()
|
||||
|
||||
# Event handlers
|
||||
page.on("response", lambda resp: redirects.append({"status": resp.status, "url": resp.url}) if 300 <= resp.status <= 399 else None)
|
||||
page.on("download", lambda d: downloads.append({"url": d.url, "suggested_filename": d.suggested_filename}))
|
||||
page.on("request", lambda r: scripts.append(r.url) if r.url.endswith((".js", ".vbs", ".hta")) else None)
|
||||
|
||||
try:
|
||||
await page.goto(url, wait_until="networkidle", timeout=60000)
|
||||
final_url = page.url
|
||||
await page.screenshot(path=str(screenshot_path), full_page=True)
|
||||
html = await page.content()
|
||||
safe_write(source_path, html)
|
||||
except PWTimeoutError:
|
||||
final_url = page.url
|
||||
safe_write(source_path, "Page did not fully load (timeout)")
|
||||
await page.screenshot(path=str(screenshot_path), full_page=True)
|
||||
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
html_content = source_path.read_text(encoding="utf-8")
|
||||
forms_info = analyze_forms(html_content, final_url)
|
||||
suspicious_scripts = analyze_scripts(html_content, base_url=final_url, engine=engine)
|
||||
|
||||
enrichment = enrich_url(url)
|
||||
|
||||
result = {
|
||||
"uuid": run_uuid,
|
||||
"submitted_url": url,
|
||||
"final_url": final_url,
|
||||
"redirects": redirects,
|
||||
"downloads": downloads,
|
||||
"scripts": scripts,
|
||||
"forms": forms_info,
|
||||
"suspicious_scripts": suspicious_scripts,
|
||||
"enrichment": enrichment
|
||||
}
|
||||
|
||||
safe_write(results_path, json.dumps(result, indent=2))
|
||||
return result
|
||||
5
app/config/bec_words.yaml
Normal file
5
app/config/bec_words.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
words:
|
||||
- "reset password"
|
||||
- "open document"
|
||||
- "view document"
|
||||
- "verify account"
|
||||
9
app/config/settings.yaml
Normal file
9
app/config/settings.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
app:
|
||||
name: SneakyScope
|
||||
version_major: 0
|
||||
version_minor: 1
|
||||
|
||||
cache:
|
||||
recent_runs_count: 10
|
||||
whois_cache_days: 7
|
||||
geoip_cache_days: 7
|
||||
80
app/config/suspicious_rules.yaml
Normal file
80
app/config/suspicious_rules.yaml
Normal file
@@ -0,0 +1,80 @@
|
||||
# config/suspicious_rules.yaml
|
||||
# Baseline suspicious rules for SneakyScope
|
||||
# Organized by category: script, form, text
|
||||
# Extend these with more specific rules as needed
|
||||
|
||||
# --- Script Rules ---
|
||||
- name: eval_usage
|
||||
description: "Use of eval() in script"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "\\beval\\("
|
||||
|
||||
- name: document_write
|
||||
description: "Use of document.write (often abused in malicious injections)"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "document\\.write\\("
|
||||
|
||||
- name: inline_event_handler
|
||||
description: "Inline event handler detected (onload, onclick, etc.)"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "on(load|click|error|mouseover|keydown)\\s*="
|
||||
|
||||
- name: obfuscated_encoding
|
||||
description: "Suspicious use of atob() or btoa() (base64 encoding/decoding)"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "\\b(atob|btoa)\\("
|
||||
|
||||
- name: suspicious_iframe
|
||||
description: "Iframe usage in script (possible phishing/malvertising)"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "<iframe[^>]*>"
|
||||
|
||||
# --- Form Rules ---
|
||||
- name: suspicious_form_action
|
||||
description: "Form action with external URL (potential credential exfiltration)"
|
||||
category: form
|
||||
type: regex
|
||||
pattern: "<form[^>]*action=['\"]http"
|
||||
|
||||
- name: hidden_inputs
|
||||
description: "Form with hidden inputs (possible credential harvesting)"
|
||||
category: form
|
||||
type: regex
|
||||
pattern: "<input[^>]*type=['\"]hidden"
|
||||
|
||||
- name: password_field
|
||||
description: "Form requesting password field"
|
||||
category: form
|
||||
type: regex
|
||||
pattern: "<input[^>]*type=['\"]password"
|
||||
|
||||
# --- Text Rules (Social Engineering / BEC) ---
|
||||
- name: urgent_request
|
||||
description: "Language suggesting urgency (common in phishing/BEC)"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: "(urgent|immediately|asap|action required)"
|
||||
|
||||
- name: account_suspension
|
||||
description: "Threat of account suspension/closure"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: "(account.*suspend|account.*close|verify.*account)"
|
||||
|
||||
- name: financial_request
|
||||
description: "Request for gift cards, wire transfer, or money"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: "(gift card|wire transfer|bank account|bitcoin|payment required)"
|
||||
|
||||
- name: credential_reset
|
||||
description: "Password reset or credential reset wording"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: "(reset password|update credentials|login to verify)"
|
||||
|
||||
137
app/enrichment.py
Normal file
137
app/enrichment.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
import yaml
|
||||
import whois
|
||||
from datetime import datetime
|
||||
from ipaddress import ip_address
|
||||
import socket
|
||||
|
||||
# Local imports
|
||||
from .utils.cache_db import get_cache
|
||||
from .utils.settings import get_settings
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
||||
|
||||
# Init cache
|
||||
cache = get_cache("/data/cache.db")
|
||||
settings = get_settings()
|
||||
|
||||
# Load BEC words
|
||||
BEC_WORDS_FILE = Path(__file__).parent.parent / "config" / "bec_words.yaml"
|
||||
if BEC_WORDS_FILE.exists():
|
||||
with open(BEC_WORDS_FILE, "r", encoding="utf-8") as f:
|
||||
BEC_WORDS = yaml.safe_load(f).get("words", [])
|
||||
else:
|
||||
BEC_WORDS = []
|
||||
|
||||
# 24 hours * 60 minutes
|
||||
days = 24 * 60
|
||||
|
||||
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
|
||||
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
|
||||
|
||||
def enrich_url(url: str) -> dict:
|
||||
"""Perform WHOIS, GeoIP, and BEC word enrichment."""
|
||||
result = {}
|
||||
|
||||
# Extract hostname
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname or url # fallback if parsing fails
|
||||
|
||||
# --- WHOIS ---
|
||||
result.update(enrich_whois(hostname))
|
||||
|
||||
# --- GeoIP ---
|
||||
result["geoip"] = enrich_geoip(hostname)
|
||||
|
||||
# --- BEC Words ---
|
||||
result["bec_words"] = [w for w in BEC_WORDS if w.lower() in url.lower()]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def enrich_whois(hostname: str) -> dict:
|
||||
"""Fetch WHOIS info using python-whois with safe type handling."""
|
||||
cache_key = f"whois:{hostname}"
|
||||
cached = cache.read(cache_key)
|
||||
if cached:
|
||||
logging.info(f"[CACHE HIT] for WHOIS: {hostname}")
|
||||
return cached
|
||||
|
||||
logging.info(f"[CACHE MISS] for WHOIS: {hostname}")
|
||||
result = {}
|
||||
try:
|
||||
w = whois.whois(hostname)
|
||||
|
||||
def format_dt(val):
|
||||
if isinstance(val, list):
|
||||
return ", ".join([v.strftime("%Y-%m-%d %H:%M:%S") if isinstance(v, datetime) else str(v) for v in val])
|
||||
elif isinstance(val, datetime):
|
||||
return val.strftime("%Y-%m-%d %H:%M:%S")
|
||||
elif val is None:
|
||||
return "Possible Privacy"
|
||||
else:
|
||||
return str(val)
|
||||
|
||||
result["whois"] = {
|
||||
"registrar": format_dt(getattr(w, "registrar", None)),
|
||||
"creation_date": format_dt(getattr(w, "creation_date", None)),
|
||||
"expiration_date": format_dt(getattr(w, "expiration_date", None)),
|
||||
"owner": format_dt(getattr(w, "org", None))
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"WHOIS lookup failed for {hostname}: {e}")
|
||||
try:
|
||||
# fallback raw whois text
|
||||
import subprocess
|
||||
raw_output = subprocess.check_output(["whois", hostname], encoding="utf-8", errors="ignore")
|
||||
result["whois"] = {}
|
||||
result["raw_whois"] = raw_output
|
||||
except Exception as raw_e:
|
||||
logging.error(f"Raw WHOIS also failed: {raw_e}")
|
||||
result["whois"] = {}
|
||||
result["raw_whois"] = "N/A"
|
||||
|
||||
cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
|
||||
return result
|
||||
|
||||
|
||||
def enrich_geoip(hostname: str) -> dict:
|
||||
"""Resolve hostname to IPs and fetch info from ip-api.com."""
|
||||
geo_info = {}
|
||||
ips = extract_ips_from_url(hostname)
|
||||
for ip in ips:
|
||||
ip_str = str(ip)
|
||||
cache_key = f"geoip:{ip_str}"
|
||||
cached = cache.read(cache_key)
|
||||
if cached:
|
||||
logging.info(f"[CACHE HIT] for GEOIP: {ip}")
|
||||
geo_info[ip_str] = cached
|
||||
continue
|
||||
|
||||
logging.info(f"[CACHE MISS] for GEOIP: {ip}")
|
||||
try:
|
||||
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
|
||||
if resp.status_code == 200:
|
||||
geo_info[ip_str] = resp.json()
|
||||
else:
|
||||
geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"}
|
||||
except Exception as e:
|
||||
geo_info[ip_str] = {"error": str(e)}
|
||||
|
||||
cache.create(cache_key, geo_info[ip_str],GEOIP_DEFAULT_TTL)
|
||||
|
||||
return geo_info
|
||||
|
||||
|
||||
def extract_ips_from_url(hostname: str):
|
||||
"""Resolve hostname to IPs."""
|
||||
try:
|
||||
info = socket.getaddrinfo(hostname, None)
|
||||
return list({ip_address(x[4][0]) for x in info})
|
||||
except Exception:
|
||||
return []
|
||||
125
app/routes.py
Normal file
125
app/routes.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
||||
|
||||
from .browser import fetch_page_artifacts
|
||||
from .enrichment import enrich_url
|
||||
from .utils.settings import get_settings
|
||||
from .utils.io_helpers import get_recent_results
|
||||
|
||||
bp = Blueprint("main", __name__)
|
||||
|
||||
settings = get_settings()
|
||||
app_name = settings.app.name
|
||||
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
||||
|
||||
# --- context processor ---
|
||||
@bp.context_processor
|
||||
def inject_app_info():
|
||||
"""Inject app name and version into all templates."""
|
||||
return {
|
||||
"app_name": app_name,
|
||||
"app_version": app_version
|
||||
}
|
||||
|
||||
@bp.route("/", methods=["GET"])
|
||||
def index():
|
||||
"""
|
||||
Render the landing page with optional 'recent_results' list.
|
||||
|
||||
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
|
||||
Falls back to 10 if not present or invalid.
|
||||
"""
|
||||
# Resolve SANDBOX_STORAGE from app config
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
|
||||
# Pull recent count from settings with a safe fallback
|
||||
try:
|
||||
# settings is already initialized at module import in your file
|
||||
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
|
||||
if recent_count < 0:
|
||||
recent_count = 0
|
||||
except Exception:
|
||||
recent_count = 10
|
||||
|
||||
# Build the recent list (non-fatal if storage is empty or unreadable)
|
||||
recent_results = get_recent_results(storage, recent_count, current_app.logger)
|
||||
|
||||
# Pass to template; your index.html will hide the card if list is empty
|
||||
return render_template("index.html", recent_results=recent_results)
|
||||
|
||||
|
||||
@bp.route("/analyze", methods=["POST"])
|
||||
def analyze():
|
||||
url = request.form.get("url", "").strip()
|
||||
current_app.logger.info(f"[*] Analyzing {url}")
|
||||
if not url:
|
||||
flash("Please enter a URL.", "error")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
storage.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
engine = current_app.config.get("RULE_ENGINE")
|
||||
result = asyncio.run(fetch_page_artifacts(url, storage, engine=engine))
|
||||
# result = asyncio.run(fetch_page_artifacts(url, storage))
|
||||
current_app.logger.info(f"[+] Analysis done for {url}")
|
||||
except Exception as e:
|
||||
flash(f"Analysis failed: {e}", "error")
|
||||
current_app.logger.error(f"Analysis failed for {url}: {e}")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
# Add enrichment safely
|
||||
try:
|
||||
enrichment = enrich_url(url)
|
||||
result["enrichment"] = enrichment
|
||||
current_app.logger.info(f"[+] Enrichment added for {url}")
|
||||
except Exception as e:
|
||||
result["enrichment"] = {}
|
||||
current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
||||
|
||||
# Redirect to permalink page for this run
|
||||
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||
|
||||
@bp.route("/results/<run_uuid>", methods=["GET"])
|
||||
def view_result(run_uuid: str):
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
results_path = run_dir / "results.json"
|
||||
|
||||
if not results_path.exists():
|
||||
current_app.logger.error(f"Results not found for UUID: {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
with open(results_path, "r", encoding="utf-8") as f:
|
||||
result = json.load(f)
|
||||
|
||||
# Pass the UUID to the template for artifact links
|
||||
result["uuid"] = run_uuid
|
||||
|
||||
return render_template("result.html", **result)
|
||||
|
||||
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
||||
def artifacts(run_uuid: str, filename: str):
|
||||
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||
run_dir = storage / run_uuid
|
||||
full_path = run_dir / filename
|
||||
|
||||
# Prevent directory traversal
|
||||
try:
|
||||
full_path.relative_to(run_dir.resolve())
|
||||
except ValueError:
|
||||
current_app.logger.warning(f"Directory traversal attempt: {filename}")
|
||||
abort(404)
|
||||
|
||||
if not full_path.exists():
|
||||
current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
||||
abort(404)
|
||||
|
||||
return send_file(full_path)
|
||||
|
||||
|
||||
288
app/static/style.css
Normal file
288
app/static/style.css
Normal file
@@ -0,0 +1,288 @@
|
||||
:root {
|
||||
font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
background: #0b0f14;
|
||||
color: #e6edf3;
|
||||
}
|
||||
|
||||
header, footer {
|
||||
padding: 1rem 1.25rem;
|
||||
background: #0f1720;
|
||||
border-bottom: 1px solid #1f2a36;
|
||||
}
|
||||
|
||||
/* ===== main: now full-width (no 960px cap) ===== */
|
||||
main {
|
||||
padding: 1.5rem 2rem; /* a bit more horizontal breathing room */
|
||||
max-width: 100%; /* remove fixed cap */
|
||||
width: 100%;
|
||||
margin: 0; /* no auto centering since we’re full-width */
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: #111826;
|
||||
padding: 1rem;
|
||||
border: 1px solid #1f2a36;
|
||||
border-radius: 12px;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
input[type=url] {
|
||||
width: 100%;
|
||||
padding: 0.7rem;
|
||||
border-radius: 8px;
|
||||
border: 1px solid #243041;
|
||||
background: #0b1220;
|
||||
color: #e6edf3;
|
||||
}
|
||||
|
||||
button, .button {
|
||||
display: inline-block;
|
||||
margin-top: 0.75rem;
|
||||
padding: 0.6rem 1rem;
|
||||
border-radius: 8px;
|
||||
border: 1px solid #243041;
|
||||
background: #1a2535;
|
||||
color: #e6edf3;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.flash {
|
||||
list-style: none;
|
||||
padding: 0.5rem 1rem;
|
||||
}
|
||||
|
||||
.flash .error {
|
||||
color: #ff6b6b;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: 150px 1fr;
|
||||
gap: 0.5rem 1rem;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border-radius: 8px;
|
||||
border: 1px solid #243041;
|
||||
}
|
||||
|
||||
pre.code {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-all;
|
||||
background: #0b1220;
|
||||
padding: 0.75rem;
|
||||
border-radius: 8px;
|
||||
border: 1px solid #243041;
|
||||
}
|
||||
|
||||
/* Links */
|
||||
a {
|
||||
color: #7dd3fc; /* Soft cyan for dark background */
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #38bdf8; /* Slightly brighter on hover */
|
||||
}
|
||||
|
||||
/* Accordion / details summary */
|
||||
details summary {
|
||||
cursor: pointer;
|
||||
padding: 0.5rem;
|
||||
font-weight: bold;
|
||||
border-radius: 8px;
|
||||
background: #111826;
|
||||
border: 1px solid #1f2a36;
|
||||
margin-bottom: 0.5rem;
|
||||
transition: background 0.3s ease;
|
||||
}
|
||||
|
||||
details[open] summary {
|
||||
background: #1a2535; /* Slightly lighter when expanded */
|
||||
}
|
||||
|
||||
details > ul, details > table {
|
||||
padding-left: 1rem;
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
/* Highlight flagged forms */
|
||||
details.flagged summary {
|
||||
border-left: 4px solid #ff6b6b; /* Red accent for flagged forms */
|
||||
}
|
||||
|
||||
/* Smooth collapse/expand */
|
||||
details ul, details p {
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
/* Enrichment / GeoIP / Forms / Redirects Tables */
|
||||
.enrichment-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.enrichment-table th,
|
||||
.enrichment-table td {
|
||||
border: 1px solid #243041;
|
||||
padding: 0.5rem;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.enrichment-table th {
|
||||
background: #111826;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.enrichment-table td {
|
||||
width: auto; /* browser resizes naturally */
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* Scripts Table Special Handling */
|
||||
.scripts-table pre.code {
|
||||
margin: 0;
|
||||
padding: 0.25rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Hover effects for table rows */
|
||||
.enrichment-table tbody tr:hover {
|
||||
background: #1f2a36;
|
||||
}
|
||||
|
||||
/* Card table headings */
|
||||
.enrichment-table thead th {
|
||||
border-bottom: 2px solid #243041;
|
||||
}
|
||||
|
||||
/* Ensure nested tables don't overflow */
|
||||
.card table {
|
||||
table-layout: auto;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* ============================
|
||||
Results Table (3+ columns)
|
||||
- Visual style matches .enrichment-table
|
||||
- Adds better wrapping for long strings (URL/UUID)
|
||||
- Right-aligns timestamps for scannability
|
||||
============================ */
|
||||
|
||||
.results-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
background: #111826; /* match card background */
|
||||
border: 1px solid #1f2a36; /* subtle border like cards */
|
||||
border-radius: 12px; /* rounded corners */
|
||||
overflow: hidden; /* clip the rounded corners */
|
||||
table-layout: auto; /* allow natural column sizing */
|
||||
}
|
||||
|
||||
/* Header styling */
|
||||
.results-table thead th {
|
||||
padding: 0.6rem 0.75rem;
|
||||
background: #0f1720; /* match header tone */
|
||||
border-bottom: 1px solid #1f2a36;
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
white-space: nowrap; /* keep short headers on one line */
|
||||
}
|
||||
|
||||
/* Body cells */
|
||||
.results-table tbody td {
|
||||
padding: 0.6rem 0.75rem;
|
||||
border-top: 1px solid #1f2a36;
|
||||
vertical-align: top;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
/* Zebra rows for readability (optional) */
|
||||
.results-table tbody tr:nth-child(odd) {
|
||||
background: #0d1522; /* slight contrast row */
|
||||
}
|
||||
|
||||
/* Links inside table should inherit your global link colors */
|
||||
.results-table a {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* ---- Column-specific tweaks ---- */
|
||||
|
||||
/* URL column: allow wrapping of long URLs without blowing the layout */
|
||||
.results-table td.url,
|
||||
.results-table td.url a {
|
||||
word-wrap: break-word; /* legacy support */
|
||||
overflow-wrap: anywhere; /* modern wrapping for long URLs */
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* UUID column: force wrap to avoid overflow */
|
||||
.results-table td.uuid {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
word-break: break-all; /* split at any point to keep table narrow */
|
||||
max-width: 28ch; /* reasonable width to avoid stretching */
|
||||
}
|
||||
|
||||
/* Timestamp column: align right and keep on a single line */
|
||||
.results-table td.timestamp {
|
||||
text-align: right;
|
||||
white-space: nowrap; /* keep ISO timestamps on one line */
|
||||
}
|
||||
|
||||
/* Optional: make the newest (first) row stand out subtly */
|
||||
.results-table tbody tr:first-child {
|
||||
box-shadow: inset 0 0 0 1px #243041;
|
||||
}
|
||||
|
||||
/* Optional: small, subtle buttons in table cells (e.g., copy UUID) */
|
||||
.results-table .copy-btn {
|
||||
margin-left: 0.4rem;
|
||||
padding: 0.2rem 0.45rem;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #243041;
|
||||
background: #1a2535;
|
||||
color: #e6edf3;
|
||||
cursor: pointer;
|
||||
line-height: 1;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.results-table .copy-btn:hover {
|
||||
filter: brightness(1.1);
|
||||
}
|
||||
|
||||
/* ===== Responsive niceties for very small screens ===== */
|
||||
@media (max-width: 768px) {
|
||||
main {
|
||||
padding: 1rem; /* a tad tighter on mobile */
|
||||
}
|
||||
|
||||
.enrichment-table,
|
||||
.results-table {
|
||||
display: block;
|
||||
overflow-x: auto; /* allow horizontal scroll if needed */
|
||||
white-space: nowrap;
|
||||
}
|
||||
}
|
||||
|
||||
.scripts-table td ul {
|
||||
margin: 0.25rem 0 0.25rem 1rem;
|
||||
padding-left: 1rem;
|
||||
}
|
||||
.scripts-table td small {
|
||||
opacity: 0.85;
|
||||
}
|
||||
33
app/templates/base.html
Normal file
33
app/templates/base.html
Normal file
@@ -0,0 +1,33 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>{{ app_name }} {{ app_version }}</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/sanitize.css" />
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>{{ app_name }} {{ app_version }}</h1>
|
||||
</header>
|
||||
|
||||
{% with messages = get_flashed_messages(with_categories=true) %}
|
||||
{% if messages %}
|
||||
<ul class="flash">
|
||||
{% for category, message in messages %}
|
||||
<li class="{{ category }}">{{ message }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
|
||||
<main>
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
<small>{{ app_name }} - A self-hosted URL analysis sandbox - {{ app_version }}</small>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
149
app/templates/index.html
Normal file
149
app/templates/index.html
Normal file
@@ -0,0 +1,149 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
|
||||
<!-- Analysis Form -->
|
||||
<form id="analyze-form" method="post" action="{{ url_for('main.analyze') }}" class="card">
|
||||
<h2>Analyze a URL</h2>
|
||||
<label for="url">Enter a URL to analyze</label>
|
||||
<input id="url" name="url" type="url" placeholder="https://example.com" required />
|
||||
<button type="submit">Analyze</button>
|
||||
</form>
|
||||
|
||||
<!-- Recent Results (optional; shown only if recent_results provided) -->
|
||||
{% if recent_results %}
|
||||
<div class="card" id="recent-results">
|
||||
<h2>Recent Results</h2>
|
||||
<table class="results-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Timestamp</th>
|
||||
<th>URL</th>
|
||||
<th>UUID</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for r in recent_results %}
|
||||
<tr>
|
||||
<td class="timestamp">
|
||||
{% if r.timestamp %}
|
||||
{{ r.timestamp }}
|
||||
{% else %}
|
||||
N/A
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="url">
|
||||
<a href="{{ url_for('main.view_result', run_uuid=r.uuid) }}">
|
||||
{{ r.final_url or r.submitted_url }}
|
||||
</a>
|
||||
</td>
|
||||
<td class="uuid">
|
||||
<code id="uuid-{{ loop.index }}">{{ r.uuid }}</code>
|
||||
<button
|
||||
type="button"
|
||||
class="copy-btn"
|
||||
data-target="uuid-{{ loop.index }}">
|
||||
📋
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Spinner Modal -->
|
||||
<div id="spinner-modal" style="
|
||||
display:none;
|
||||
opacity:0;
|
||||
position:fixed;
|
||||
top:0;
|
||||
left:0;
|
||||
width:100%;
|
||||
height:100%;
|
||||
background:rgba(0,0,0,0.7);
|
||||
color:#fff;
|
||||
font-size:1.5rem;
|
||||
text-align:center;
|
||||
padding-top:20%;
|
||||
z-index:9999;
|
||||
transition: opacity 0.3s ease;
|
||||
">
|
||||
<div>
|
||||
<div class="loader" style="
|
||||
border: 8px solid #f3f3f3;
|
||||
border-top: 8px solid #1a2535;
|
||||
border-radius: 50%;
|
||||
width: 60px;
|
||||
height: 60px;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 0 auto 1rem auto;
|
||||
"></div>
|
||||
Analyzing website…
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
const form = document.getElementById('analyze-form');
|
||||
const modal = document.getElementById('spinner-modal');
|
||||
|
||||
function showModal() {
|
||||
modal.style.display = 'block';
|
||||
requestAnimationFrame(() => {
|
||||
modal.style.opacity = '1';
|
||||
});
|
||||
}
|
||||
|
||||
function hideModal() {
|
||||
modal.style.opacity = '0';
|
||||
modal.addEventListener('transitionend', () => {
|
||||
modal.style.display = 'none';
|
||||
}, { once: true });
|
||||
}
|
||||
|
||||
// Hide spinner on initial load / back navigation
|
||||
window.addEventListener('pageshow', () => {
|
||||
modal.style.opacity = '0';
|
||||
modal.style.display = 'none';
|
||||
});
|
||||
|
||||
form.addEventListener('submit', (e) => {
|
||||
showModal();
|
||||
// Prevent double submission
|
||||
form.querySelector('button').disabled = true;
|
||||
|
||||
// Allow browser to render the modal before submitting
|
||||
requestAnimationFrame(() => form.submit());
|
||||
e.preventDefault();
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const buttons = document.querySelectorAll('.copy-btn');
|
||||
buttons.forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
const targetId = btn.getAttribute('data-target');
|
||||
const uuidText = document.getElementById(targetId).innerText;
|
||||
|
||||
navigator.clipboard.writeText(uuidText).then(() => {
|
||||
// Give quick feedback
|
||||
btn.textContent = '✅';
|
||||
setTimeout(() => { btn.textContent = '📋'; }, 1500);
|
||||
}).catch(err => {
|
||||
console.error('Failed to copy UUID:', err);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
268
app/templates/result.html
Normal file
268
app/templates/result.html
Normal file
@@ -0,0 +1,268 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
|
||||
<!-- Top Jump List -->
|
||||
<div class="card" id="top-jump-list">
|
||||
<h2>Jump to Section</h2>
|
||||
<ul>
|
||||
<li><a href="/">Analyse Another Page</a></li>
|
||||
<li><a href="#url-overview">URL Overview</a></li>
|
||||
<li><a href="#enrichment">Enrichment</a></li>
|
||||
<li><a href="#redirects">Redirects</a></li>
|
||||
<li><a href="#forms">Forms</a></li>
|
||||
<li><a href="#scripts">Suspicious Scripts</a></li>
|
||||
<li><a href="#screenshot">Screenshot</a></li>
|
||||
<li><a href="#source">Source</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- URL Overview -->
|
||||
<div class="card" id="url-overview">
|
||||
<h2>URL Overview</h2>
|
||||
<p><strong>Submitted URL:</strong> {{ submitted_url }}</p>
|
||||
<p><strong>Final URL:</strong> <a href="{{ final_url }}" target="_blank">{{ final_url }}</a></p>
|
||||
<p><strong>Permalink:</strong>
|
||||
<a href="{{ url_for('main.view_result', run_uuid=uuid, _external=True) }}">
|
||||
{{ request.host_url }}results/{{ uuid }}
|
||||
</a>
|
||||
</p>
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
<!-- Enrichment -->
|
||||
<div class="card" id="enrichment">
|
||||
<h2>Enrichment</h2>
|
||||
|
||||
<!-- WHOIS -->
|
||||
{% if enrichment.whois %}
|
||||
<h3>WHOIS</h3>
|
||||
<table class="enrichment-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Field</th>
|
||||
<th>Value</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for k, v in enrichment.whois.items() %}
|
||||
<tr>
|
||||
<td>{{ k.replace('_', ' ').title() }}</td>
|
||||
<td>{{ v }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endif %}
|
||||
|
||||
{% if enrichment.raw_whois %}
|
||||
<h3>Raw WHOIS</h3>
|
||||
<pre class="code">{{ enrichment.raw_whois }}</pre>
|
||||
{% endif %}
|
||||
|
||||
<!-- GeoIP / IP-API -->
|
||||
{% if enrichment.geoip %}
|
||||
<h3>GeoIP</h3>
|
||||
{% for ip, info in enrichment.geoip.items() %}
|
||||
<details class="card" style="padding:0.5rem; margin-bottom:0.5rem;">
|
||||
<summary>{{ ip }}</summary>
|
||||
<table class="enrichment-table">
|
||||
<tbody>
|
||||
{% for key, val in info.items() %}
|
||||
<tr>
|
||||
<td>{{ key.replace('_', ' ').title() }}</td>
|
||||
<td>{{ val }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</details>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
<!-- BEC Words -->
|
||||
{% if enrichment.bec_words %}
|
||||
<h3>BEC Words Detected</h3>
|
||||
<table class="enrichment-table">
|
||||
<thead>
|
||||
<tr><th>Word</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for word in enrichment.bec_words %}
|
||||
<tr><td>{{ word }}</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endif %}
|
||||
|
||||
{% if not enrichment.whois and not enrichment.raw_whois and not enrichment.geoip and not enrichment.bec_words %}
|
||||
<p>No enrichment data available.</p>
|
||||
{% endif %}
|
||||
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
<!-- Redirects -->
|
||||
<div class="card" id="redirects">
|
||||
<h2>Redirects</h2>
|
||||
{% if redirects %}
|
||||
<table class="enrichment-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<th>URL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for r in redirects %}
|
||||
<tr>
|
||||
<td>{{ r.status }}</td>
|
||||
<td><a href="{{ r.url }}" target="_blank">{{ r.url }}</a></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% else %}
|
||||
<p>No redirects detected.</p>
|
||||
{% endif %}
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
<!-- Forms -->
|
||||
<div class="card" id="forms">
|
||||
<h2>Forms</h2>
|
||||
{% if forms %}
|
||||
{% for form in forms %}
|
||||
<details class="card {% if form.flagged %}flagged{% endif %}" style="padding:0.5rem; margin-bottom:0.5rem;">
|
||||
<summary>{{ form.status }} — Action: {{ form.action }} ({{ form.method | upper }})</summary>
|
||||
<table class="enrichment-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Input Name</th>
|
||||
<th>Type</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for inp in form.inputs %}
|
||||
<tr>
|
||||
<td>{{ inp.name }}</td>
|
||||
<td>{{ inp.type }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% if form.flagged %}
|
||||
<p><strong>Flag Reasons:</strong></p>
|
||||
<ul>
|
||||
{% for reason in form.flag_reasons %}
|
||||
<li>{{ reason }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</details>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<p>No forms detected.</p>
|
||||
{% endif %}
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
<!-- Suspicious Scripts -->
|
||||
<div class="card" id="scripts">
|
||||
<h2>Suspicious Scripts</h2>
|
||||
|
||||
{% if suspicious_scripts %}
|
||||
<table class="enrichment-table scripts-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Source URL</th>
|
||||
<th>Content Snippet</th>
|
||||
<th>Matches (Rules & Heuristics)</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for s in suspicious_scripts %}
|
||||
<tr>
|
||||
<!-- Type -->
|
||||
<td>{{ s.type or 'unknown' }}</td>
|
||||
|
||||
<!-- Source URL -->
|
||||
<td>
|
||||
{% if s.src %}
|
||||
<a href="{{ s.src }}" target="_blank">{{ s.src }}</a>
|
||||
{% else %}
|
||||
N/A
|
||||
{% endif %}
|
||||
</td>
|
||||
|
||||
<!-- Inline content snippet (collapsible) -->
|
||||
<td>
|
||||
{% if s.content_snippet %}
|
||||
<details>
|
||||
<summary>View snippet</summary>
|
||||
<pre class="code">{{ s.content_snippet }}</pre>
|
||||
</details>
|
||||
{% else %}
|
||||
N/A
|
||||
{% endif %}
|
||||
</td>
|
||||
|
||||
<!-- Rules & Heuristics -->
|
||||
<td>
|
||||
{% set has_rules = s.rules and s.rules|length > 0 %}
|
||||
{% set has_heur = s.heuristics and s.heuristics|length > 0 %}
|
||||
|
||||
{% if has_rules %}
|
||||
<strong>Rules</strong>
|
||||
<ul>
|
||||
{% for r in s.rules %}
|
||||
<li title="{{ r.description or '' }}">
|
||||
{{ r.name }}
|
||||
{% if r.description %}
|
||||
<small>— {{ r.description }}</small>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
{% if has_heur %}
|
||||
<strong>Heuristics</strong>
|
||||
<ul>
|
||||
{% for h in s.heuristics %}
|
||||
<li>{{ h }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
{% if not has_rules and not has_heur %}
|
||||
N/A
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% else %}
|
||||
<p>No suspicious scripts detected.</p>
|
||||
{% endif %}
|
||||
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
|
||||
<!-- Screenshot -->
|
||||
<div class="card" id="screenshot">
|
||||
<h2>Screenshot</h2>
|
||||
<img src="{{ url_for('main.artifacts', run_uuid=uuid, filename='screenshot.png') }}" alt="Screenshot">
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
<!-- Source -->
|
||||
<div class="card" id="source">
|
||||
<h2>Source</h2>
|
||||
<p><a href="{{ url_for('main.artifacts', run_uuid=uuid, filename='source.txt') }}" target="_blank">View Source</a></p>
|
||||
<p><a href="#top-jump-list">Back to top</a></p>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
128
app/utils/cache_db.py
Normal file
128
app/utils/cache_db.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import json
|
||||
import time
|
||||
import sqlite3
|
||||
import threading
|
||||
import functools
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
# ---------- SINGLETON DECORATOR ----------
|
||||
T = Any
|
||||
|
||||
def singleton_loader(func):
|
||||
"""Ensure only one cache instance exists."""
|
||||
cache: dict[str, T] = {}
|
||||
lock = threading.Lock()
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs) -> T:
|
||||
with lock:
|
||||
if func.__name__ not in cache:
|
||||
cache[func.__name__] = func(*args, **kwargs)
|
||||
return cache[func.__name__]
|
||||
return wrapper
|
||||
|
||||
# ---------- CACHE CLASS ----------
|
||||
class CacheDB:
|
||||
"""SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support."""
|
||||
|
||||
TABLE_NAME = "cache"
|
||||
|
||||
def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440):
|
||||
"""
|
||||
:param default_expiration_minutes: default expiration in minutes (default 24 hours)
|
||||
"""
|
||||
self.db_path = Path(db_path)
|
||||
self.default_expiration = default_expiration_minutes * 60 # convert minutes -> seconds
|
||||
|
||||
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
self._lock = threading.Lock()
|
||||
self._create_table()
|
||||
|
||||
def _create_table(self):
|
||||
"""Create the cache table if it doesn't exist."""
|
||||
with self._lock:
|
||||
self.conn.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT,
|
||||
expires_at INTEGER
|
||||
)
|
||||
""")
|
||||
self.conn.commit()
|
||||
|
||||
def _cleanup_expired(self):
|
||||
"""Delete expired rows."""
|
||||
now = int(time.time())
|
||||
with self._lock:
|
||||
self.conn.execute(
|
||||
f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
# ---------- CRUD ----------
|
||||
def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
|
||||
"""Insert or update a cache entry. expires_in_minutes overrides default expiration."""
|
||||
self._cleanup_expired()
|
||||
if expires_in_minutes is None:
|
||||
expires_in_seconds = self.default_expiration
|
||||
else:
|
||||
expires_in_seconds = expires_in_minutes * 60
|
||||
expires_at = int(time.time()) + expires_in_seconds
|
||||
|
||||
value_json = json.dumps(value)
|
||||
with self._lock:
|
||||
self.conn.execute(
|
||||
f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)",
|
||||
(key, value_json, expires_at)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def read(self, key: str) -> Optional[Any]:
|
||||
"""Read a cache entry. Auto-cleans expired items."""
|
||||
self._cleanup_expired()
|
||||
with self._lock:
|
||||
row = self.conn.execute(
|
||||
f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,)
|
||||
).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return json.loads(row["value"])
|
||||
|
||||
def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
|
||||
"""Update a cache entry. Optional expiration in minutes."""
|
||||
if expires_in_minutes is None:
|
||||
expires_in_seconds = self.default_expiration
|
||||
else:
|
||||
expires_in_seconds = expires_in_minutes * 60
|
||||
expires_at = int(time.time()) + expires_in_seconds
|
||||
|
||||
value_json = json.dumps(value)
|
||||
with self._lock:
|
||||
self.conn.execute(
|
||||
f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?",
|
||||
(value_json, expires_at, key)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def delete(self, key: str):
|
||||
with self._lock:
|
||||
self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,))
|
||||
self.conn.commit()
|
||||
|
||||
def clear(self):
|
||||
"""Delete all rows from the cache table."""
|
||||
with self._lock:
|
||||
self.conn.execute(f"DELETE FROM {self.TABLE_NAME}")
|
||||
self.conn.commit()
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
||||
|
||||
# ---------- SINGLETON INSTANCE ----------
|
||||
@singleton_loader
|
||||
def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB:
|
||||
return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes)
|
||||
115
app/utils/io_helpers.py
Normal file
115
app/utils/io_helpers.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
||||
|
||||
def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
|
||||
"""Write content to a file safely with logging."""
|
||||
path = Path(path)
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, mode, encoding=encoding) as f:
|
||||
f.write(content)
|
||||
logging.info(f"[+] Wrote file: {path}")
|
||||
except Exception as e:
|
||||
logging.error(f"[!] Failed writing {path}: {e}")
|
||||
raise
|
||||
|
||||
def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
|
||||
"""
|
||||
Scan the SANDBOX_STORAGE directory for run folders (UUIDs), read each
|
||||
run's results.json, and return the most recent N entries by file mtime.
|
||||
|
||||
Args:
|
||||
storage_dir (Path): Base path where UUID run directories live.
|
||||
limit (int): Maximum number of recent items to return.
|
||||
logger: Flask or stdlib logger to record non-fatal issues.
|
||||
|
||||
Returns:
|
||||
list[dict]: Each item includes:
|
||||
{
|
||||
"uuid": str,
|
||||
"submitted_url": str | None,
|
||||
"final_url": str | None,
|
||||
"timestamp": str (ISO 8601),
|
||||
}
|
||||
Returns an empty list if no runs are found or on error.
|
||||
"""
|
||||
items = []
|
||||
|
||||
try:
|
||||
# Ensure the storage dir exists
|
||||
storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Iterate directories directly under storage_dir
|
||||
for entry in storage_dir.iterdir():
|
||||
try:
|
||||
if not entry.is_dir():
|
||||
# Skip non-directories
|
||||
continue
|
||||
|
||||
# Expect results.json inside each UUID directory
|
||||
results_path = entry / "results.json"
|
||||
if not results_path.exists():
|
||||
# Skip folders without results.json
|
||||
continue
|
||||
|
||||
# Read file metadata (mtime) for sorting and display
|
||||
stat_info = results_path.stat()
|
||||
mtime_epoch = stat_info.st_mtime
|
||||
mtime_iso = datetime.fromtimestamp(mtime_epoch).isoformat(timespec="seconds")
|
||||
|
||||
# Parse a small subset of the JSON for display
|
||||
submitted_url = None
|
||||
final_url = None
|
||||
run_uuid = entry.name
|
||||
|
||||
try:
|
||||
with open(results_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if isinstance(data, dict):
|
||||
submitted_url = data.get("submitted_url")
|
||||
final_url = data.get("final_url")
|
||||
except Exception as read_err:
|
||||
# If JSON is malformed or unreadable, log and continue
|
||||
if logger:
|
||||
logger.warning(f"[recent] Failed reading {results_path}: {read_err}")
|
||||
|
||||
item = {
|
||||
"uuid": run_uuid,
|
||||
"submitted_url": submitted_url,
|
||||
"final_url": final_url,
|
||||
"timestamp": mtime_iso
|
||||
}
|
||||
|
||||
items.append((mtime_epoch, item))
|
||||
except Exception as inner_err:
|
||||
# Keep going; a single bad folder should not break the list
|
||||
if logger:
|
||||
logger.warning(f"[recent] Skipping {entry}: {inner_err}")
|
||||
|
||||
# Sort by mtime desc
|
||||
try:
|
||||
items.sort(key=lambda t: t[0], reverse=True)
|
||||
except Exception as sort_err:
|
||||
if logger:
|
||||
logger.warning(f"[recent] Sort failed: {sort_err}")
|
||||
|
||||
# Trim to limit without list comprehensions
|
||||
trimmed = []
|
||||
count = 0
|
||||
for tup in items:
|
||||
if count >= limit:
|
||||
break
|
||||
trimmed.append(tup[1])
|
||||
count = count + 1
|
||||
|
||||
return trimmed
|
||||
|
||||
except Exception as outer_err:
|
||||
if logger:
|
||||
logger.error(f"[recent] Unexpected error while scanning {storage_dir}: {outer_err}")
|
||||
return []
|
||||
132
app/utils/rules_engine.py
Normal file
132
app/utils/rules_engine.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
rules_engine.py
|
||||
|
||||
A flexible rule-based engine for detecting suspicious patterns in scripts, forms,
|
||||
or other web artifacts inside SneakyScope.
|
||||
|
||||
Each rule is defined as:
|
||||
- name: str # Rule identifier
|
||||
- description: str # Human-readable reason for analysts
|
||||
- category: str # e.g., 'script', 'form', 'text', 'generic'
|
||||
- type: str # 'regex' or 'function'
|
||||
- pattern: str # Regex pattern (if type=regex)
|
||||
- function: callable # Python function returning (bool, str) (if type=function)
|
||||
|
||||
The framework returns a list of results, with pass/fail and reasoning.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Tuple, Union
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class Rule:
|
||||
"""Represents a single detection rule."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
category: str,
|
||||
rule_type: str = "regex",
|
||||
pattern: str = None,
|
||||
function: Callable = None,
|
||||
):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.category = category
|
||||
self.rule_type = rule_type
|
||||
self.pattern = pattern
|
||||
self.function = function
|
||||
|
||||
def run(self, text: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Run the rule on given text.
|
||||
|
||||
Returns:
|
||||
(matched: bool, reason: str)
|
||||
"""
|
||||
if self.rule_type == "regex" and self.pattern:
|
||||
if re.search(self.pattern, text, re.IGNORECASE):
|
||||
return True, f"Matched regex '{self.pattern}' → {self.description}"
|
||||
else:
|
||||
return False, "No match"
|
||||
elif self.rule_type == "function" and callable(self.function):
|
||||
return self.function(text)
|
||||
else:
|
||||
return False, "Invalid rule configuration"
|
||||
|
||||
|
||||
class RuleEngine:
|
||||
"""Loads and executes rules against provided text."""
|
||||
|
||||
def __init__(self, rules: List[Rule] = None):
|
||||
self.rules = rules or []
|
||||
|
||||
def add_rule(self, rule: Rule):
|
||||
"""Add a new rule at runtime."""
|
||||
self.rules.append(rule)
|
||||
|
||||
def run_all(self, text: str, category: str = None) -> List[Dict]:
|
||||
"""
|
||||
Run all rules against text.
|
||||
|
||||
Args:
|
||||
text: str → the content to test
|
||||
category: str → optional, only run rules in this category
|
||||
|
||||
Returns:
|
||||
List of dicts with rule results.
|
||||
"""
|
||||
results = []
|
||||
for rule in self.rules:
|
||||
if category and rule.category != category:
|
||||
continue
|
||||
|
||||
matched, reason = rule.run(text)
|
||||
results.append(
|
||||
{
|
||||
"rule": rule.name,
|
||||
"category": rule.category,
|
||||
"matched": matched,
|
||||
"reason": reason if matched else None,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
|
||||
"""
|
||||
Load rules from a YAML file.
|
||||
|
||||
Example YAML format:
|
||||
- name: suspicious_eval
|
||||
description: "Use of eval() in script"
|
||||
category: script
|
||||
type: regex
|
||||
pattern: "\\beval\\("
|
||||
|
||||
- name: password_reset
|
||||
description: "Password reset wording"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: "reset password"
|
||||
|
||||
"""
|
||||
rules = []
|
||||
with open(yaml_file, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
for item in data:
|
||||
rule = Rule(
|
||||
name=item["name"],
|
||||
description=item["description"],
|
||||
category=item["category"],
|
||||
rule_type=item.get("type", "regex"),
|
||||
pattern=item.get("pattern"),
|
||||
)
|
||||
rules.append(rule)
|
||||
|
||||
return rules
|
||||
144
app/utils/settings.py
Normal file
144
app/utils/settings.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#
|
||||
# Note the settings file is hardcoded in this class at the top after imports.
|
||||
#
|
||||
# To make a new settings section, just add the setting dict to your yaml
|
||||
# and then define the data class below in the config data classes area.
|
||||
#
|
||||
# Example use from anywhere - this will always return the same singleton
|
||||
# from settings import get_settings
|
||||
# def main():
|
||||
# settings = get_settings()
|
||||
# print(settings.database.host) # Autocomplete works
|
||||
# print(settings.logging.level)
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
||||
import functools
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, TypeVar
|
||||
from dataclasses import dataclass, fields, is_dataclass, field, MISSING
|
||||
|
||||
import logging
|
||||
import sys
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ModuleNotFoundError:
|
||||
msg = (
|
||||
"Required modules are not installed. "
|
||||
"Can not continue with module / application loading.\n"
|
||||
"Install it with: pip install -r requirements"
|
||||
)
|
||||
print(msg, file=sys.stderr)
|
||||
logger.error(msg)
|
||||
exit()
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
DEFAULT_SETTINGS_FILE = BASE_DIR / "config" / "settings.yaml"
|
||||
|
||||
# ---------- CONFIG DATA CLASSES ----------
|
||||
@dataclass
|
||||
class Cache_Config:
|
||||
whois_cache_days: int = 7
|
||||
geoip_cache_days: int = 7
|
||||
recent_runs_count: int = 10
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
name: str = "MyApp"
|
||||
version_major: int = 1
|
||||
version_minor: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class Settings:
|
||||
cache: Cache_Config = field(default_factory=Cache_Config)
|
||||
app: AppConfig = field(default_factory=AppConfig)
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: str | Path) -> "Settings":
|
||||
try:
|
||||
"""Load settings from YAML file into a Settings object."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
raw: dict[str, Any] = yaml.safe_load(f) or {}
|
||||
except FileNotFoundError:
|
||||
logger.warning(f"Settings file {path} not found! Using default settings.")
|
||||
raw = {}
|
||||
|
||||
init_kwargs = {}
|
||||
for f_def in fields(cls):
|
||||
yaml_value = raw.get(f_def.name, None)
|
||||
|
||||
# Determine default value from default_factory or default
|
||||
if f_def.default_factory is not MISSING:
|
||||
default_value = f_def.default_factory()
|
||||
elif f_def.default is not MISSING:
|
||||
default_value = f_def.default
|
||||
else:
|
||||
default_value = None
|
||||
|
||||
# Handle nested dataclasses
|
||||
if is_dataclass(f_def.type):
|
||||
if isinstance(yaml_value, dict):
|
||||
# Merge YAML values with defaults
|
||||
merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
|
||||
merged_data.update(yaml_value)
|
||||
init_kwargs[f_def.name] = f_def.type(**merged_data)
|
||||
else:
|
||||
init_kwargs[f_def.name] = default_value
|
||||
else:
|
||||
init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
|
||||
|
||||
return cls(**init_kwargs)
|
||||
|
||||
|
||||
# ---------- SINGLETON DECORATOR ----------
|
||||
T = TypeVar("T")
|
||||
|
||||
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
|
||||
"""Ensure the function only runs once, returning the cached value."""
|
||||
cache: dict[str, T] = {}
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs) -> T:
|
||||
if func.__name__ not in cache:
|
||||
cache[func.__name__] = func(*args, **kwargs)
|
||||
return cache[func.__name__]
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# ---------- SINGLETON DECORATOR ----------
|
||||
T = TypeVar("T")
|
||||
|
||||
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
|
||||
"""Decorator to ensure the settings are loaded only once."""
|
||||
cache: dict[str, T] = {}
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs) -> T:
|
||||
if func.__name__ not in cache:
|
||||
cache[func.__name__] = func(*args, **kwargs)
|
||||
return cache[func.__name__]
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@singleton_loader
|
||||
def get_settings(config_path: str | Path | None = None) -> Settings:
|
||||
"""
|
||||
Returns the singleton Settings instance.
|
||||
|
||||
Args:
|
||||
config_path: Optional path to the YAML config file. If not provided,
|
||||
defaults to 'config/settings.yaml' in the current working directory.
|
||||
"""
|
||||
if config_path is None:
|
||||
config_path = DEFAULT_SETTINGS_FILE
|
||||
else:
|
||||
config_path = Path(config_path)
|
||||
|
||||
return Settings.from_yaml(config_path)
|
||||
10
app/wsgi.py
Normal file
10
app/wsgi.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
app/wsgi.py
|
||||
|
||||
Gunicorn entrypoint for SneakyScope.
|
||||
"""
|
||||
|
||||
from . import create_app
|
||||
|
||||
# Gunicorn will look for "app"
|
||||
app = create_app()
|
||||
Reference in New Issue
Block a user