first commit
This commit is contained in:
10
.env.example
Normal file
10
.env.example
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Flask Configuration
|
||||||
|
FLASK_ENV=production
|
||||||
|
SECRET_KEY=changeme_super_long_random_secret
|
||||||
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Playwright (browser automation)
|
||||||
|
PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||||
|
|
||||||
|
# Sandbox Storage
|
||||||
|
SANDBOX_STORAGE=/data
|
||||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
.env
|
||||||
|
/data/
|
||||||
34
Dockerfile
Normal file
34
Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Use the official Playwright image with browsers preinstalled
|
||||||
|
FROM mcr.microsoft.com/playwright/python:v1.45.0-jammy
|
||||||
|
|
||||||
|
# Create a non-root user (the base image already has pwuser, we'll keep it)
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# System deps (whois, dig, etc. — handy for later stages)
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
whois dnsutils iputils-ping ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy requirements first to leverage Docker layer caching
|
||||||
|
COPY requirements.txt ./
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code (the double app is needed because the app folder needs to be inside the app folder)
|
||||||
|
COPY app/ /app/app/
|
||||||
|
|
||||||
|
COPY entrypoint.sh ./entrypoint.sh
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
# Create data dir for screenshots/artifacts
|
||||||
|
RUN mkdir -p /data && chown -R pwuser:pwuser /data /app
|
||||||
|
|
||||||
|
USER pwuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Start server
|
||||||
|
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||||
92
Readme.md
Normal file
92
Readme.md
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# URL Sandbox
|
||||||
|
|
||||||
|
A lightweight web-based sandbox for analyzing websites and domains.
|
||||||
|
It performs WHOIS lookups, GeoIP enrichment, script/form inspection, and provides analyst-friendly output.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Features
|
||||||
|
|
||||||
|
- **Domain & IP Enrichment**
|
||||||
|
- WHOIS lookups with fallback to raw text when fields are missing
|
||||||
|
- Explicit handling of privacy-protected WHOIS records (`N/A` or `Possible Privacy`)
|
||||||
|
- GeoIP (City, Region, Country, Latitude/Longitude)
|
||||||
|
- ASN, ISP, and network details
|
||||||
|
- **Flagged Content Analysis**
|
||||||
|
- Suspicious script detection
|
||||||
|
- Suspicious form detection
|
||||||
|
- Nested bullet-style reporting for clarity
|
||||||
|
- **Improved UX**
|
||||||
|
- Automatic addition of `http://`, `https://`, and `www.` if only a domain is provided
|
||||||
|
- Modal spinner to indicate background analysis (`Analyzing website…`)
|
||||||
|
- **Resilient GeoLite2 Database Management**
|
||||||
|
- Downloads the MaxMind GeoLite2-City database on first startup
|
||||||
|
- Checks file age and only re-downloads if older than **14 days** (configurable via environment variable)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚙️ Setup Instructions
|
||||||
|
|
||||||
|
### 1. Clone the Repository
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/yourusername/url-sandbox.git
|
||||||
|
cd url-sandbox
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Create a MaxMind Account & License Key
|
||||||
|
1. Go to [MaxMind GeoLite2](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data)
|
||||||
|
2. Sign up for a free account
|
||||||
|
3. Navigate to **Account > Manage License Keys**
|
||||||
|
4. Generate a new license key
|
||||||
|
|
||||||
|
### 3. Configure Environment Variables
|
||||||
|
All environment variables are loaded from a `.env` file.
|
||||||
|
|
||||||
|
1. Copy the sample file:
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
````
|
||||||
|
|
||||||
|
2. Edit `.env` and set your values (see [`.env.example`](./.env.example) for available options).
|
||||||
|
|
||||||
|
Make sure to add your **MaxMind License Key** under `MAXMIND_LICENSE_KEY`.
|
||||||
|
|
||||||
|
|
||||||
|
### 4. Run with Docker Compose
|
||||||
|
```bash
|
||||||
|
docker-compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Build the app
|
||||||
|
- Download the GeoLite2 database if not present or too old
|
||||||
|
- Start the web interface
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📝 Example Output
|
||||||
|
|
||||||
|
**WHOIS Info**
|
||||||
|
- Registrar: MarkMonitor, Inc.
|
||||||
|
- Organization: Possible Privacy
|
||||||
|
- Creation: 1997-09-15
|
||||||
|
- Expiration: 2028-09-14
|
||||||
|
|
||||||
|
**GeoIP Info**
|
||||||
|
- IP: 172.66.159.20
|
||||||
|
- City: N/A
|
||||||
|
- Region: N/A
|
||||||
|
- Country: United States
|
||||||
|
- Coordinates: (37.751, -97.822)
|
||||||
|
- ASN: 13335
|
||||||
|
- ISP: Cloudflare, Inc.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📌 Roadmap
|
||||||
|
See [Next Steps Checklist](docs/roadmap.md) for planned features:
|
||||||
|
- Improved UI templates
|
||||||
|
- Artifact cleanup
|
||||||
|
- Proxy support (optional)
|
||||||
|
|
||||||
|
---
|
||||||
82
app/__init__.py
Normal file
82
app/__init__.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
"""
|
||||||
|
app/__init__.py
|
||||||
|
|
||||||
|
Application factory and startup hooks for SneakyScope.
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
- Create the Flask app.
|
||||||
|
- Load settings (YAML -> dataclasses) with safe defaults.
|
||||||
|
- Initialize and load the Suspicious Rules Engine from YAML.
|
||||||
|
- Register blueprints (routes).
|
||||||
|
- Configure core paths (e.g., SANDBOX_STORAGE).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
# Local imports
|
||||||
|
from .utils.settings import get_settings
|
||||||
|
from .utils import io_helpers # if you need logging/setup later
|
||||||
|
from .utils import cache_db # available for future injections
|
||||||
|
from .utils.rules_engine import RuleEngine, load_rules_from_yaml # rules engine
|
||||||
|
from . import routes # blueprint
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> Flask:
|
||||||
|
"""
|
||||||
|
Create and configure the Flask application instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Flask: The configured Flask app.
|
||||||
|
"""
|
||||||
|
# Basic app object
|
||||||
|
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||||
|
|
||||||
|
# Load settings (safe fallback to defaults if file missing)
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Secret key loaded from env
|
||||||
|
app.secret_key = os.getenv("SECRET_KEY")
|
||||||
|
|
||||||
|
# Configure storage directory (bind-mount is still handled by sandbox.sh)
|
||||||
|
sandbox_storage_default = Path("/data")
|
||||||
|
app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
|
||||||
|
|
||||||
|
# Initialize Suspicious Rules Engine at startup
|
||||||
|
# Determine rules file path relative to this package
|
||||||
|
base_dir = Path(__file__).resolve().parent
|
||||||
|
rules_path = base_dir / "config" / "suspicious_rules.yaml"
|
||||||
|
|
||||||
|
# Create an engine instance (even if file missing, we still want an engine)
|
||||||
|
engine = RuleEngine()
|
||||||
|
|
||||||
|
# Try to load from YAML if present; log clearly if not
|
||||||
|
if rules_path.exists():
|
||||||
|
try:
|
||||||
|
loaded_rules = load_rules_from_yaml(rules_path)
|
||||||
|
# Add rules one-by-one (explicit)
|
||||||
|
for rule in loaded_rules:
|
||||||
|
engine.add_rule(rule)
|
||||||
|
app.logger.info(f"[+] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
|
||||||
|
except Exception as e:
|
||||||
|
app.logger.warning(f"[!] Failed loading rules from {rules_path}: {e}")
|
||||||
|
else:
|
||||||
|
app.logger.warning(f"[!] Rules file not found at {rules_path}. Engine will start with zero rules.")
|
||||||
|
|
||||||
|
# Store engine on app config so it is accessible via current_app
|
||||||
|
app.config["RULE_ENGINE"] = engine
|
||||||
|
|
||||||
|
# Make app name/version available for templates here if you want it globally
|
||||||
|
app.config["APP_NAME"] = settings.app.name
|
||||||
|
app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
|
||||||
|
|
||||||
|
# Register blueprints
|
||||||
|
app.register_blueprint(routes.bp)
|
||||||
|
|
||||||
|
# Example log line so we know we booted cleanly
|
||||||
|
app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
|
||||||
|
app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
|
||||||
|
|
||||||
|
return app
|
||||||
400
app/browser.py
Normal file
400
app/browser.py
Normal file
@@ -0,0 +1,400 @@
|
|||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from playwright.async_api import async_playwright, TimeoutError as PWTimeoutError
|
||||||
|
|
||||||
|
from flask import current_app # access the rule engine from app config
|
||||||
|
|
||||||
|
from app.utils.io_helpers import safe_write
|
||||||
|
from .enrichment import enrich_url
|
||||||
|
|
||||||
|
def get_rule_engine():
|
||||||
|
"""
|
||||||
|
Retrieve the rules engine instance from the Flask application config.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
RuleEngine or None: The engine if available, or None if not configured.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# current_app is only available during an active request context
|
||||||
|
engine = current_app.config.get("RULE_ENGINE")
|
||||||
|
return engine
|
||||||
|
except Exception:
|
||||||
|
# If called outside a Flask request context, fail gracefully
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def run_rule_checks(text, category):
|
||||||
|
"""
|
||||||
|
Run all rules for a given category against the provided text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): The content to test (e.g., form snippet, inline JS).
|
||||||
|
category (str): The rule category to run (e.g., 'form' or 'script').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: {
|
||||||
|
"checks": [ { "rule": str, "category": str, "matched": bool, "reason": Optional[str] }, ... ],
|
||||||
|
"summary": { "matched_count": int, "total_rules": int }
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
"checks": [],
|
||||||
|
"summary": {
|
||||||
|
"matched_count": 0,
|
||||||
|
"total_rules": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
engine = get_rule_engine()
|
||||||
|
if engine is None:
|
||||||
|
# No engine configured; return empty but well-formed structure
|
||||||
|
return result
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run engine rules for the specified category
|
||||||
|
check_results = engine.run_all(text, category=category)
|
||||||
|
|
||||||
|
# Normalize results into the expected structure
|
||||||
|
total = 0
|
||||||
|
matched = 0
|
||||||
|
|
||||||
|
for item in check_results:
|
||||||
|
# item is expected to contain: rule, category, matched, reason (optional)
|
||||||
|
total = total + 1
|
||||||
|
if bool(item.get("matched")):
|
||||||
|
matched = matched + 1
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
"rule": item.get("rule"),
|
||||||
|
"category": item.get("category"),
|
||||||
|
"matched": bool(item.get("matched")),
|
||||||
|
"reason": item.get("reason")
|
||||||
|
}
|
||||||
|
result["checks"].append(normalized)
|
||||||
|
|
||||||
|
result["summary"]["matched_count"] = matched
|
||||||
|
result["summary"]["total_rules"] = total
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# If anything goes wrong, keep structure and add a fake failure note
|
||||||
|
result["checks"].append({
|
||||||
|
"rule": "engine_error",
|
||||||
|
"category": category,
|
||||||
|
"matched": False,
|
||||||
|
"reason": f"Rule engine error: {e}"
|
||||||
|
})
|
||||||
|
result["summary"]["matched_count"] = 0
|
||||||
|
result["summary"]["total_rules"] = 0
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_forms(html: str, base_url: str):
|
||||||
|
"""
|
||||||
|
Parse forms from the page HTML and apply heuristic flags and rule-based checks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
html (str): The full page HTML.
|
||||||
|
base_url (str): The final URL of the page (used for hostname comparisons).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict]: A list of form analysis dictionaries, each including:
|
||||||
|
- action, method, inputs
|
||||||
|
- flagged (bool), flag_reasons (list[str]), status (str)
|
||||||
|
- rule_checks: dict with "checks" (list) and "summary" (dict)
|
||||||
|
"""
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
forms_info = []
|
||||||
|
page_hostname = urlparse(base_url).hostname
|
||||||
|
|
||||||
|
for form in soup.find_all("form"):
|
||||||
|
action = form.get("action")
|
||||||
|
method = form.get("method", "get").lower()
|
||||||
|
|
||||||
|
# Build explicit inputs list
|
||||||
|
inputs = []
|
||||||
|
for inp in form.find_all("input"):
|
||||||
|
input_name = inp.get("name")
|
||||||
|
input_type = inp.get("type", "text")
|
||||||
|
inputs.append({
|
||||||
|
"name": input_name,
|
||||||
|
"type": input_type
|
||||||
|
})
|
||||||
|
|
||||||
|
flagged_reasons = []
|
||||||
|
|
||||||
|
# No action specified
|
||||||
|
if not action or str(action).strip() == "":
|
||||||
|
flagged_reasons.append("No action specified")
|
||||||
|
|
||||||
|
# External host
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
action_host = urlparse(action).hostname
|
||||||
|
if not str(action).startswith("/") and action_host != page_hostname:
|
||||||
|
flagged_reasons.append("Submits to a different host")
|
||||||
|
except Exception:
|
||||||
|
# If hostname parsing fails, skip this condition quietly
|
||||||
|
pass
|
||||||
|
|
||||||
|
# HTTP form on HTTPS page
|
||||||
|
try:
|
||||||
|
if urlparse(action).scheme == "http" and urlparse(base_url).scheme == "https":
|
||||||
|
flagged_reasons.append("Submits over insecure HTTP")
|
||||||
|
except Exception:
|
||||||
|
# If scheme parsing fails, ignore
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Hidden password / suspicious hidden inputs
|
||||||
|
for hidden in form.find_all("input", type="hidden"):
|
||||||
|
name_value = hidden.get("name") or ""
|
||||||
|
if "password" in name_value.lower():
|
||||||
|
flagged_reasons.append("Hidden password field")
|
||||||
|
|
||||||
|
flagged = bool(flagged_reasons)
|
||||||
|
|
||||||
|
# Serialize a simple form snippet for the rules engine to analyze (category='form')
|
||||||
|
snippet_lines = []
|
||||||
|
snippet_lines.append(f"action={action}")
|
||||||
|
snippet_lines.append(f"method={method}")
|
||||||
|
snippet_lines.append("inputs=")
|
||||||
|
for item in inputs:
|
||||||
|
snippet_lines.append(f" - name={item.get('name')} type={item.get('type')}")
|
||||||
|
form_snippet = "\n".join(snippet_lines)
|
||||||
|
|
||||||
|
rule_checks = run_rule_checks(form_snippet, category="form")
|
||||||
|
|
||||||
|
forms_info.append({
|
||||||
|
"action": action,
|
||||||
|
"method": method,
|
||||||
|
"inputs": inputs,
|
||||||
|
"flagged": flagged,
|
||||||
|
"flag_reasons": flagged_reasons,
|
||||||
|
"status": "flagged" if flagged else "possibly safe",
|
||||||
|
"rule_checks": rule_checks
|
||||||
|
})
|
||||||
|
|
||||||
|
return forms_info
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_scripts(html: str, base_url: str = "", engine=None) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Analyze <script> elements using the RuleEngine (if provided) and
|
||||||
|
lightweight built-in heuristics. Only append a record when at least
|
||||||
|
one rule or heuristic matches, and always set a sensible 'type'.
|
||||||
|
|
||||||
|
Returns list of dicts like:
|
||||||
|
{
|
||||||
|
"type": "external" | "inline" | "unknown",
|
||||||
|
"src": "...", # for external
|
||||||
|
"content_snippet": "...", # for inline
|
||||||
|
"rules": [ { "name": "...", "description": "..." }, ... ],
|
||||||
|
"heuristics": [ "reason1", "reason2", ... ]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
results: list[dict] = []
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Benign MIME types we ignore entirely
|
||||||
|
benign_types = {"application/ld+json", "application/json"}
|
||||||
|
|
||||||
|
# Suspicious file extensions for external scripts
|
||||||
|
dangerous_ext = (".vbs", ".hta")
|
||||||
|
|
||||||
|
# Inline red flags
|
||||||
|
risky_inline_patterns = [
|
||||||
|
(re.compile(r"\beval\s*\(", re.IGNORECASE), "Uses eval()"),
|
||||||
|
(re.compile(r"\bnew\s+Function\s*\(", re.IGNORECASE), "Uses Function constructor"),
|
||||||
|
(re.compile(r"\bdocument\.write\s*\(", re.IGNORECASE), "Uses document.write()"),
|
||||||
|
(re.compile(r"\bActiveXObject\s*\(", re.IGNORECASE), "Uses ActiveXObject (IE-only)"),
|
||||||
|
(re.compile(r"\batob\s*\(", re.IGNORECASE), "Uses atob() (possible obfuscation)"),
|
||||||
|
(re.compile(r"\bunescape\s*\(", re.IGNORECASE), "Uses unescape() (legacy/obfuscation)"),
|
||||||
|
(re.compile(r"\bset(?:Timeout|Interval)\s*\(\s*['\"`].+['\"`]\s*,", re.IGNORECASE),
|
||||||
|
"String passed to setTimeout/setInterval"),
|
||||||
|
(re.compile(r"[\"']?0x[0-9a-fA-F]{16,}[\"']?", re.IGNORECASE),
|
||||||
|
"Contains long hex-like constants (possible obfuscation)"),
|
||||||
|
]
|
||||||
|
|
||||||
|
base_host = urlparse(base_url).hostname or ""
|
||||||
|
|
||||||
|
for script in soup.find_all("script"):
|
||||||
|
try:
|
||||||
|
src = (script.get("src") or "").strip()
|
||||||
|
s_type_attr = (script.get("type") or "").strip().lower()
|
||||||
|
|
||||||
|
# IMPORTANT: .string is often None; get_text() is reliable
|
||||||
|
inline_text = script.get_text(strip=True) or ""
|
||||||
|
|
||||||
|
# Skip benign structured data outright
|
||||||
|
if s_type_attr in benign_types:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ---- Build facts for the rules engine
|
||||||
|
facts = {
|
||||||
|
"script_type_attr": s_type_attr or None,
|
||||||
|
"has_src": bool(src),
|
||||||
|
"src": src or None,
|
||||||
|
"attrs": dict(script.attrs),
|
||||||
|
"inline_len": len(inline_text),
|
||||||
|
"inline_preview": inline_text[:200].replace("\n", " ") if inline_text else None,
|
||||||
|
"base_url": base_url or None,
|
||||||
|
"base_hostname": base_host or None,
|
||||||
|
"src_hostname": urlparse(src).hostname if src else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- Evaluate rules engine (using name/description)
|
||||||
|
engine_matches: list[dict] = []
|
||||||
|
if engine is not None:
|
||||||
|
try:
|
||||||
|
if hasattr(engine, "evaluate_script"):
|
||||||
|
matches = engine.evaluate_script(facts)
|
||||||
|
elif hasattr(engine, "evaluate"):
|
||||||
|
matches = engine.evaluate(facts)
|
||||||
|
else:
|
||||||
|
matches = []
|
||||||
|
|
||||||
|
if isinstance(matches, list):
|
||||||
|
for m in matches:
|
||||||
|
if isinstance(m, dict) and "name" in m:
|
||||||
|
engine_matches.append({
|
||||||
|
"name": m["name"],
|
||||||
|
"description": m.get("description", "")
|
||||||
|
})
|
||||||
|
elif isinstance(m, str):
|
||||||
|
engine_matches.append({"name": m, "description": ""})
|
||||||
|
except Exception as e:
|
||||||
|
engine_matches.append({"name": "Rules Engine Error", "description": str(e)})
|
||||||
|
|
||||||
|
# ---- Built-in heuristics
|
||||||
|
heuristics: list[str] = []
|
||||||
|
if src:
|
||||||
|
# Unusual URL schemes for script sources
|
||||||
|
if src.startswith(("data:", "blob:")):
|
||||||
|
heuristics.append("Script src uses data:/blob: URL")
|
||||||
|
# Dangerous extensions
|
||||||
|
for ext in dangerous_ext:
|
||||||
|
if src.lower().endswith(ext):
|
||||||
|
heuristics.append(f"External script with dangerous extension ({ext.lstrip('.')})")
|
||||||
|
break
|
||||||
|
# Third-party host hint
|
||||||
|
src_host = facts.get("src_hostname") or ""
|
||||||
|
if base_host and src_host and src_host != base_host:
|
||||||
|
heuristics.append(f"Third-party host: {src_host}")
|
||||||
|
else:
|
||||||
|
if inline_text:
|
||||||
|
for pat, why in risky_inline_patterns:
|
||||||
|
if pat.search(inline_text):
|
||||||
|
heuristics.append(why)
|
||||||
|
|
||||||
|
# ---- Only append when something matched; always set type
|
||||||
|
if engine_matches or heuristics:
|
||||||
|
record: dict = {}
|
||||||
|
|
||||||
|
if src:
|
||||||
|
record["type"] = "external"
|
||||||
|
record["src"] = src
|
||||||
|
elif inline_text:
|
||||||
|
record["type"] = "inline"
|
||||||
|
record["content_snippet"] = facts.get("inline_preview")
|
||||||
|
else:
|
||||||
|
record["type"] = "unknown"
|
||||||
|
|
||||||
|
if engine_matches:
|
||||||
|
record["rules"] = engine_matches
|
||||||
|
if heuristics:
|
||||||
|
record["heuristics"] = heuristics
|
||||||
|
|
||||||
|
results.append(record)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Never let a single broken <script> kill the whole analysis
|
||||||
|
results.append({
|
||||||
|
"type": "unknown",
|
||||||
|
"heuristics": [f"Script analysis error: {e}"]
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_page_artifacts(url: str, storage_dir: Path, engine=None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Fetch page artifacts and save them in a UUID-based directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): URL to analyze.
|
||||||
|
storage_dir (Path): Base /data path.
|
||||||
|
engine: Optional rules engine instance (from app.config["RULE_ENGINE"]).
|
||||||
|
"""
|
||||||
|
run_uuid = str(uuid.uuid4())
|
||||||
|
run_dir = storage_dir / run_uuid
|
||||||
|
run_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
screenshot_path = run_dir / "screenshot.png"
|
||||||
|
source_path = run_dir / "source.txt"
|
||||||
|
results_path = run_dir / "results.json"
|
||||||
|
|
||||||
|
redirects = []
|
||||||
|
downloads = []
|
||||||
|
scripts = []
|
||||||
|
|
||||||
|
async with async_playwright() as pw:
|
||||||
|
browser = await pw.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=["--no-sandbox", "--disable-dev-shm-usage", "--disable-blink-features=AutomationControlled"]
|
||||||
|
)
|
||||||
|
context = await browser.new_context(
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
|
||||||
|
java_script_enabled=True,
|
||||||
|
locale="en-US"
|
||||||
|
)
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
# Event handlers
|
||||||
|
page.on("response", lambda resp: redirects.append({"status": resp.status, "url": resp.url}) if 300 <= resp.status <= 399 else None)
|
||||||
|
page.on("download", lambda d: downloads.append({"url": d.url, "suggested_filename": d.suggested_filename}))
|
||||||
|
page.on("request", lambda r: scripts.append(r.url) if r.url.endswith((".js", ".vbs", ".hta")) else None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.goto(url, wait_until="networkidle", timeout=60000)
|
||||||
|
final_url = page.url
|
||||||
|
await page.screenshot(path=str(screenshot_path), full_page=True)
|
||||||
|
html = await page.content()
|
||||||
|
safe_write(source_path, html)
|
||||||
|
except PWTimeoutError:
|
||||||
|
final_url = page.url
|
||||||
|
safe_write(source_path, "Page did not fully load (timeout)")
|
||||||
|
await page.screenshot(path=str(screenshot_path), full_page=True)
|
||||||
|
|
||||||
|
await context.close()
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
html_content = source_path.read_text(encoding="utf-8")
|
||||||
|
forms_info = analyze_forms(html_content, final_url)
|
||||||
|
suspicious_scripts = analyze_scripts(html_content, base_url=final_url, engine=engine)
|
||||||
|
|
||||||
|
enrichment = enrich_url(url)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"uuid": run_uuid,
|
||||||
|
"submitted_url": url,
|
||||||
|
"final_url": final_url,
|
||||||
|
"redirects": redirects,
|
||||||
|
"downloads": downloads,
|
||||||
|
"scripts": scripts,
|
||||||
|
"forms": forms_info,
|
||||||
|
"suspicious_scripts": suspicious_scripts,
|
||||||
|
"enrichment": enrichment
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_write(results_path, json.dumps(result, indent=2))
|
||||||
|
return result
|
||||||
5
app/config/bec_words.yaml
Normal file
5
app/config/bec_words.yaml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
words:
|
||||||
|
- "reset password"
|
||||||
|
- "open document"
|
||||||
|
- "view document"
|
||||||
|
- "verify account"
|
||||||
9
app/config/settings.yaml
Normal file
9
app/config/settings.yaml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
app:
|
||||||
|
name: SneakyScope
|
||||||
|
version_major: 0
|
||||||
|
version_minor: 1
|
||||||
|
|
||||||
|
cache:
|
||||||
|
recent_runs_count: 10
|
||||||
|
whois_cache_days: 7
|
||||||
|
geoip_cache_days: 7
|
||||||
80
app/config/suspicious_rules.yaml
Normal file
80
app/config/suspicious_rules.yaml
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# config/suspicious_rules.yaml
|
||||||
|
# Baseline suspicious rules for SneakyScope
|
||||||
|
# Organized by category: script, form, text
|
||||||
|
# Extend these with more specific rules as needed
|
||||||
|
|
||||||
|
# --- Script Rules ---
|
||||||
|
- name: eval_usage
|
||||||
|
description: "Use of eval() in script"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "\\beval\\("
|
||||||
|
|
||||||
|
- name: document_write
|
||||||
|
description: "Use of document.write (often abused in malicious injections)"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "document\\.write\\("
|
||||||
|
|
||||||
|
- name: inline_event_handler
|
||||||
|
description: "Inline event handler detected (onload, onclick, etc.)"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "on(load|click|error|mouseover|keydown)\\s*="
|
||||||
|
|
||||||
|
- name: obfuscated_encoding
|
||||||
|
description: "Suspicious use of atob() or btoa() (base64 encoding/decoding)"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "\\b(atob|btoa)\\("
|
||||||
|
|
||||||
|
- name: suspicious_iframe
|
||||||
|
description: "Iframe usage in script (possible phishing/malvertising)"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "<iframe[^>]*>"
|
||||||
|
|
||||||
|
# --- Form Rules ---
|
||||||
|
- name: suspicious_form_action
|
||||||
|
description: "Form action with external URL (potential credential exfiltration)"
|
||||||
|
category: form
|
||||||
|
type: regex
|
||||||
|
pattern: "<form[^>]*action=['\"]http"
|
||||||
|
|
||||||
|
- name: hidden_inputs
|
||||||
|
description: "Form with hidden inputs (possible credential harvesting)"
|
||||||
|
category: form
|
||||||
|
type: regex
|
||||||
|
pattern: "<input[^>]*type=['\"]hidden"
|
||||||
|
|
||||||
|
- name: password_field
|
||||||
|
description: "Form requesting password field"
|
||||||
|
category: form
|
||||||
|
type: regex
|
||||||
|
pattern: "<input[^>]*type=['\"]password"
|
||||||
|
|
||||||
|
# --- Text Rules (Social Engineering / BEC) ---
|
||||||
|
- name: urgent_request
|
||||||
|
description: "Language suggesting urgency (common in phishing/BEC)"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
pattern: "(urgent|immediately|asap|action required)"
|
||||||
|
|
||||||
|
- name: account_suspension
|
||||||
|
description: "Threat of account suspension/closure"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
pattern: "(account.*suspend|account.*close|verify.*account)"
|
||||||
|
|
||||||
|
- name: financial_request
|
||||||
|
description: "Request for gift cards, wire transfer, or money"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
pattern: "(gift card|wire transfer|bank account|bitcoin|payment required)"
|
||||||
|
|
||||||
|
- name: credential_reset
|
||||||
|
description: "Password reset or credential reset wording"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
pattern: "(reset password|update credentials|login to verify)"
|
||||||
|
|
||||||
137
app/enrichment.py
Normal file
137
app/enrichment.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import requests
|
||||||
|
import yaml
|
||||||
|
import whois
|
||||||
|
from datetime import datetime
|
||||||
|
from ipaddress import ip_address
|
||||||
|
import socket
|
||||||
|
|
||||||
|
# Local imports
|
||||||
|
from .utils.cache_db import get_cache
|
||||||
|
from .utils.settings import get_settings
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
||||||
|
|
||||||
|
# Init cache
|
||||||
|
cache = get_cache("/data/cache.db")
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Load BEC words
|
||||||
|
BEC_WORDS_FILE = Path(__file__).parent.parent / "config" / "bec_words.yaml"
|
||||||
|
if BEC_WORDS_FILE.exists():
|
||||||
|
with open(BEC_WORDS_FILE, "r", encoding="utf-8") as f:
|
||||||
|
BEC_WORDS = yaml.safe_load(f).get("words", [])
|
||||||
|
else:
|
||||||
|
BEC_WORDS = []
|
||||||
|
|
||||||
|
# 24 hours * 60 minutes
|
||||||
|
days = 24 * 60
|
||||||
|
|
||||||
|
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
|
||||||
|
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
|
||||||
|
|
||||||
|
def enrich_url(url: str) -> dict:
|
||||||
|
"""Perform WHOIS, GeoIP, and BEC word enrichment."""
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
# Extract hostname
|
||||||
|
parsed = urlparse(url)
|
||||||
|
hostname = parsed.hostname or url # fallback if parsing fails
|
||||||
|
|
||||||
|
# --- WHOIS ---
|
||||||
|
result.update(enrich_whois(hostname))
|
||||||
|
|
||||||
|
# --- GeoIP ---
|
||||||
|
result["geoip"] = enrich_geoip(hostname)
|
||||||
|
|
||||||
|
# --- BEC Words ---
|
||||||
|
result["bec_words"] = [w for w in BEC_WORDS if w.lower() in url.lower()]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_whois(hostname: str) -> dict:
|
||||||
|
"""Fetch WHOIS info using python-whois with safe type handling."""
|
||||||
|
cache_key = f"whois:{hostname}"
|
||||||
|
cached = cache.read(cache_key)
|
||||||
|
if cached:
|
||||||
|
logging.info(f"[CACHE HIT] for WHOIS: {hostname}")
|
||||||
|
return cached
|
||||||
|
|
||||||
|
logging.info(f"[CACHE MISS] for WHOIS: {hostname}")
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
w = whois.whois(hostname)
|
||||||
|
|
||||||
|
def format_dt(val):
|
||||||
|
if isinstance(val, list):
|
||||||
|
return ", ".join([v.strftime("%Y-%m-%d %H:%M:%S") if isinstance(v, datetime) else str(v) for v in val])
|
||||||
|
elif isinstance(val, datetime):
|
||||||
|
return val.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
elif val is None:
|
||||||
|
return "Possible Privacy"
|
||||||
|
else:
|
||||||
|
return str(val)
|
||||||
|
|
||||||
|
result["whois"] = {
|
||||||
|
"registrar": format_dt(getattr(w, "registrar", None)),
|
||||||
|
"creation_date": format_dt(getattr(w, "creation_date", None)),
|
||||||
|
"expiration_date": format_dt(getattr(w, "expiration_date", None)),
|
||||||
|
"owner": format_dt(getattr(w, "org", None))
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"WHOIS lookup failed for {hostname}: {e}")
|
||||||
|
try:
|
||||||
|
# fallback raw whois text
|
||||||
|
import subprocess
|
||||||
|
raw_output = subprocess.check_output(["whois", hostname], encoding="utf-8", errors="ignore")
|
||||||
|
result["whois"] = {}
|
||||||
|
result["raw_whois"] = raw_output
|
||||||
|
except Exception as raw_e:
|
||||||
|
logging.error(f"Raw WHOIS also failed: {raw_e}")
|
||||||
|
result["whois"] = {}
|
||||||
|
result["raw_whois"] = "N/A"
|
||||||
|
|
||||||
|
cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_geoip(hostname: str) -> dict:
|
||||||
|
"""Resolve hostname to IPs and fetch info from ip-api.com."""
|
||||||
|
geo_info = {}
|
||||||
|
ips = extract_ips_from_url(hostname)
|
||||||
|
for ip in ips:
|
||||||
|
ip_str = str(ip)
|
||||||
|
cache_key = f"geoip:{ip_str}"
|
||||||
|
cached = cache.read(cache_key)
|
||||||
|
if cached:
|
||||||
|
logging.info(f"[CACHE HIT] for GEOIP: {ip}")
|
||||||
|
geo_info[ip_str] = cached
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info(f"[CACHE MISS] for GEOIP: {ip}")
|
||||||
|
try:
|
||||||
|
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
geo_info[ip_str] = resp.json()
|
||||||
|
else:
|
||||||
|
geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"}
|
||||||
|
except Exception as e:
|
||||||
|
geo_info[ip_str] = {"error": str(e)}
|
||||||
|
|
||||||
|
cache.create(cache_key, geo_info[ip_str],GEOIP_DEFAULT_TTL)
|
||||||
|
|
||||||
|
return geo_info
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ips_from_url(hostname: str):
|
||||||
|
"""Resolve hostname to IPs."""
|
||||||
|
try:
|
||||||
|
info = socket.getaddrinfo(hostname, None)
|
||||||
|
return list({ip_address(x[4][0]) for x in info})
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
125
app/routes.py
Normal file
125
app/routes.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
|
||||||
|
|
||||||
|
from .browser import fetch_page_artifacts
|
||||||
|
from .enrichment import enrich_url
|
||||||
|
from .utils.settings import get_settings
|
||||||
|
from .utils.io_helpers import get_recent_results
|
||||||
|
|
||||||
|
bp = Blueprint("main", __name__)
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
app_name = settings.app.name
|
||||||
|
app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
|
||||||
|
|
||||||
|
# --- context processor ---
|
||||||
|
@bp.context_processor
|
||||||
|
def inject_app_info():
|
||||||
|
"""Inject app name and version into all templates."""
|
||||||
|
return {
|
||||||
|
"app_name": app_name,
|
||||||
|
"app_version": app_version
|
||||||
|
}
|
||||||
|
|
||||||
|
@bp.route("/", methods=["GET"])
|
||||||
|
def index():
|
||||||
|
"""
|
||||||
|
Render the landing page with optional 'recent_results' list.
|
||||||
|
|
||||||
|
The number of recent runs is controlled via settings.cache.recent_runs_count (int).
|
||||||
|
Falls back to 10 if not present or invalid.
|
||||||
|
"""
|
||||||
|
# Resolve SANDBOX_STORAGE from app config
|
||||||
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
|
|
||||||
|
# Pull recent count from settings with a safe fallback
|
||||||
|
try:
|
||||||
|
# settings is already initialized at module import in your file
|
||||||
|
recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
|
||||||
|
if recent_count < 0:
|
||||||
|
recent_count = 0
|
||||||
|
except Exception:
|
||||||
|
recent_count = 10
|
||||||
|
|
||||||
|
# Build the recent list (non-fatal if storage is empty or unreadable)
|
||||||
|
recent_results = get_recent_results(storage, recent_count, current_app.logger)
|
||||||
|
|
||||||
|
# Pass to template; your index.html will hide the card if list is empty
|
||||||
|
return render_template("index.html", recent_results=recent_results)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/analyze", methods=["POST"])
|
||||||
|
def analyze():
|
||||||
|
url = request.form.get("url", "").strip()
|
||||||
|
current_app.logger.info(f"[*] Analyzing {url}")
|
||||||
|
if not url:
|
||||||
|
flash("Please enter a URL.", "error")
|
||||||
|
return redirect(url_for("main.index"))
|
||||||
|
|
||||||
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
|
storage.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
engine = current_app.config.get("RULE_ENGINE")
|
||||||
|
result = asyncio.run(fetch_page_artifacts(url, storage, engine=engine))
|
||||||
|
# result = asyncio.run(fetch_page_artifacts(url, storage))
|
||||||
|
current_app.logger.info(f"[+] Analysis done for {url}")
|
||||||
|
except Exception as e:
|
||||||
|
flash(f"Analysis failed: {e}", "error")
|
||||||
|
current_app.logger.error(f"Analysis failed for {url}: {e}")
|
||||||
|
return redirect(url_for("main.index"))
|
||||||
|
|
||||||
|
# Add enrichment safely
|
||||||
|
try:
|
||||||
|
enrichment = enrich_url(url)
|
||||||
|
result["enrichment"] = enrichment
|
||||||
|
current_app.logger.info(f"[+] Enrichment added for {url}")
|
||||||
|
except Exception as e:
|
||||||
|
result["enrichment"] = {}
|
||||||
|
current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
||||||
|
|
||||||
|
# Redirect to permalink page for this run
|
||||||
|
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||||
|
|
||||||
|
@bp.route("/results/<run_uuid>", methods=["GET"])
|
||||||
|
def view_result(run_uuid: str):
|
||||||
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
|
run_dir = storage / run_uuid
|
||||||
|
results_path = run_dir / "results.json"
|
||||||
|
|
||||||
|
if not results_path.exists():
|
||||||
|
current_app.logger.error(f"Results not found for UUID: {run_uuid}")
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
with open(results_path, "r", encoding="utf-8") as f:
|
||||||
|
result = json.load(f)
|
||||||
|
|
||||||
|
# Pass the UUID to the template for artifact links
|
||||||
|
result["uuid"] = run_uuid
|
||||||
|
|
||||||
|
return render_template("result.html", **result)
|
||||||
|
|
||||||
|
@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
|
||||||
|
def artifacts(run_uuid: str, filename: str):
|
||||||
|
storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
|
||||||
|
run_dir = storage / run_uuid
|
||||||
|
full_path = run_dir / filename
|
||||||
|
|
||||||
|
# Prevent directory traversal
|
||||||
|
try:
|
||||||
|
full_path.relative_to(run_dir.resolve())
|
||||||
|
except ValueError:
|
||||||
|
current_app.logger.warning(f"Directory traversal attempt: {filename}")
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
if not full_path.exists():
|
||||||
|
current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
return send_file(full_path)
|
||||||
|
|
||||||
|
|
||||||
288
app/static/style.css
Normal file
288
app/static/style.css
Normal file
@@ -0,0 +1,288 @@
|
|||||||
|
:root {
|
||||||
|
font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
background: #0b0f14;
|
||||||
|
color: #e6edf3;
|
||||||
|
}
|
||||||
|
|
||||||
|
header, footer {
|
||||||
|
padding: 1rem 1.25rem;
|
||||||
|
background: #0f1720;
|
||||||
|
border-bottom: 1px solid #1f2a36;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== main: now full-width (no 960px cap) ===== */
|
||||||
|
main {
|
||||||
|
padding: 1.5rem 2rem; /* a bit more horizontal breathing room */
|
||||||
|
max-width: 100%; /* remove fixed cap */
|
||||||
|
width: 100%;
|
||||||
|
margin: 0; /* no auto centering since we’re full-width */
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card {
|
||||||
|
background: #111826;
|
||||||
|
padding: 1rem;
|
||||||
|
border: 1px solid #1f2a36;
|
||||||
|
border-radius: 12px;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
label {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type=url] {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.7rem;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid #243041;
|
||||||
|
background: #0b1220;
|
||||||
|
color: #e6edf3;
|
||||||
|
}
|
||||||
|
|
||||||
|
button, .button {
|
||||||
|
display: inline-block;
|
||||||
|
margin-top: 0.75rem;
|
||||||
|
padding: 0.6rem 1rem;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid #243041;
|
||||||
|
background: #1a2535;
|
||||||
|
color: #e6edf3;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.flash {
|
||||||
|
list-style: none;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.flash .error {
|
||||||
|
color: #ff6b6b;
|
||||||
|
}
|
||||||
|
|
||||||
|
.grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 150px 1fr;
|
||||||
|
gap: 0.5rem 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
img {
|
||||||
|
max-width: 100%;
|
||||||
|
height: auto;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid #243041;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.code {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-all;
|
||||||
|
background: #0b1220;
|
||||||
|
padding: 0.75rem;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid #243041;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Links */
|
||||||
|
a {
|
||||||
|
color: #7dd3fc; /* Soft cyan for dark background */
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
color: #38bdf8; /* Slightly brighter on hover */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Accordion / details summary */
|
||||||
|
details summary {
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0.5rem;
|
||||||
|
font-weight: bold;
|
||||||
|
border-radius: 8px;
|
||||||
|
background: #111826;
|
||||||
|
border: 1px solid #1f2a36;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
transition: background 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
details[open] summary {
|
||||||
|
background: #1a2535; /* Slightly lighter when expanded */
|
||||||
|
}
|
||||||
|
|
||||||
|
details > ul, details > table {
|
||||||
|
padding-left: 1rem;
|
||||||
|
margin: 0.5rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Highlight flagged forms */
|
||||||
|
details.flagged summary {
|
||||||
|
border-left: 4px solid #ff6b6b; /* Red accent for flagged forms */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Smooth collapse/expand */
|
||||||
|
details ul, details p {
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Enrichment / GeoIP / Forms / Redirects Tables */
|
||||||
|
.enrichment-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.enrichment-table th,
|
||||||
|
.enrichment-table td {
|
||||||
|
border: 1px solid #243041;
|
||||||
|
padding: 0.5rem;
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
.enrichment-table th {
|
||||||
|
background: #111826;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.enrichment-table td {
|
||||||
|
width: auto; /* browser resizes naturally */
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scripts Table Special Handling */
|
||||||
|
.scripts-table pre.code {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.25rem;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Hover effects for table rows */
|
||||||
|
.enrichment-table tbody tr:hover {
|
||||||
|
background: #1f2a36;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Card table headings */
|
||||||
|
.enrichment-table thead th {
|
||||||
|
border-bottom: 2px solid #243041;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure nested tables don't overflow */
|
||||||
|
.card table {
|
||||||
|
table-layout: auto;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ============================
|
||||||
|
Results Table (3+ columns)
|
||||||
|
- Visual style matches .enrichment-table
|
||||||
|
- Adds better wrapping for long strings (URL/UUID)
|
||||||
|
- Right-aligns timestamps for scannability
|
||||||
|
============================ */
|
||||||
|
|
||||||
|
.results-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
background: #111826; /* match card background */
|
||||||
|
border: 1px solid #1f2a36; /* subtle border like cards */
|
||||||
|
border-radius: 12px; /* rounded corners */
|
||||||
|
overflow: hidden; /* clip the rounded corners */
|
||||||
|
table-layout: auto; /* allow natural column sizing */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Header styling */
|
||||||
|
.results-table thead th {
|
||||||
|
padding: 0.6rem 0.75rem;
|
||||||
|
background: #0f1720; /* match header tone */
|
||||||
|
border-bottom: 1px solid #1f2a36;
|
||||||
|
text-align: left;
|
||||||
|
font-weight: 600;
|
||||||
|
white-space: nowrap; /* keep short headers on one line */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Body cells */
|
||||||
|
.results-table tbody td {
|
||||||
|
padding: 0.6rem 0.75rem;
|
||||||
|
border-top: 1px solid #1f2a36;
|
||||||
|
vertical-align: top;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Zebra rows for readability (optional) */
|
||||||
|
.results-table tbody tr:nth-child(odd) {
|
||||||
|
background: #0d1522; /* slight contrast row */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Links inside table should inherit your global link colors */
|
||||||
|
.results-table a {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- Column-specific tweaks ---- */
|
||||||
|
|
||||||
|
/* URL column: allow wrapping of long URLs without blowing the layout */
|
||||||
|
.results-table td.url,
|
||||||
|
.results-table td.url a {
|
||||||
|
word-wrap: break-word; /* legacy support */
|
||||||
|
overflow-wrap: anywhere; /* modern wrapping for long URLs */
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* UUID column: force wrap to avoid overflow */
|
||||||
|
.results-table td.uuid {
|
||||||
|
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||||
|
word-break: break-all; /* split at any point to keep table narrow */
|
||||||
|
max-width: 28ch; /* reasonable width to avoid stretching */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Timestamp column: align right and keep on a single line */
|
||||||
|
.results-table td.timestamp {
|
||||||
|
text-align: right;
|
||||||
|
white-space: nowrap; /* keep ISO timestamps on one line */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Optional: make the newest (first) row stand out subtly */
|
||||||
|
.results-table tbody tr:first-child {
|
||||||
|
box-shadow: inset 0 0 0 1px #243041;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Optional: small, subtle buttons in table cells (e.g., copy UUID) */
|
||||||
|
.results-table .copy-btn {
|
||||||
|
margin-left: 0.4rem;
|
||||||
|
padding: 0.2rem 0.45rem;
|
||||||
|
border-radius: 6px;
|
||||||
|
border: 1px solid #243041;
|
||||||
|
background: #1a2535;
|
||||||
|
color: #e6edf3;
|
||||||
|
cursor: pointer;
|
||||||
|
line-height: 1;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-table .copy-btn:hover {
|
||||||
|
filter: brightness(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== Responsive niceties for very small screens ===== */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
main {
|
||||||
|
padding: 1rem; /* a tad tighter on mobile */
|
||||||
|
}
|
||||||
|
|
||||||
|
.enrichment-table,
|
||||||
|
.results-table {
|
||||||
|
display: block;
|
||||||
|
overflow-x: auto; /* allow horizontal scroll if needed */
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.scripts-table td ul {
|
||||||
|
margin: 0.25rem 0 0.25rem 1rem;
|
||||||
|
padding-left: 1rem;
|
||||||
|
}
|
||||||
|
.scripts-table td small {
|
||||||
|
opacity: 0.85;
|
||||||
|
}
|
||||||
33
app/templates/base.html
Normal file
33
app/templates/base.html
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<title>{{ app_name }} {{ app_version }}</title>
|
||||||
|
<link rel="stylesheet" href="https://unpkg.com/sanitize.css" />
|
||||||
|
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>{{ app_name }} {{ app_version }}</h1>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
{% with messages = get_flashed_messages(with_categories=true) %}
|
||||||
|
{% if messages %}
|
||||||
|
<ul class="flash">
|
||||||
|
{% for category, message in messages %}
|
||||||
|
<li class="{{ category }}">{{ message }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
{% endwith %}
|
||||||
|
|
||||||
|
<main>
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer>
|
||||||
|
<small>{{ app_name }} - A self-hosted URL analysis sandbox - {{ app_version }}</small>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
149
app/templates/index.html
Normal file
149
app/templates/index.html
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<!-- Analysis Form -->
|
||||||
|
<form id="analyze-form" method="post" action="{{ url_for('main.analyze') }}" class="card">
|
||||||
|
<h2>Analyze a URL</h2>
|
||||||
|
<label for="url">Enter a URL to analyze</label>
|
||||||
|
<input id="url" name="url" type="url" placeholder="https://example.com" required />
|
||||||
|
<button type="submit">Analyze</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<!-- Recent Results (optional; shown only if recent_results provided) -->
|
||||||
|
{% if recent_results %}
|
||||||
|
<div class="card" id="recent-results">
|
||||||
|
<h2>Recent Results</h2>
|
||||||
|
<table class="results-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Timestamp</th>
|
||||||
|
<th>URL</th>
|
||||||
|
<th>UUID</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for r in recent_results %}
|
||||||
|
<tr>
|
||||||
|
<td class="timestamp">
|
||||||
|
{% if r.timestamp %}
|
||||||
|
{{ r.timestamp }}
|
||||||
|
{% else %}
|
||||||
|
N/A
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td class="url">
|
||||||
|
<a href="{{ url_for('main.view_result', run_uuid=r.uuid) }}">
|
||||||
|
{{ r.final_url or r.submitted_url }}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td class="uuid">
|
||||||
|
<code id="uuid-{{ loop.index }}">{{ r.uuid }}</code>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="copy-btn"
|
||||||
|
data-target="uuid-{{ loop.index }}">
|
||||||
|
📋
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Spinner Modal -->
|
||||||
|
<div id="spinner-modal" style="
|
||||||
|
display:none;
|
||||||
|
opacity:0;
|
||||||
|
position:fixed;
|
||||||
|
top:0;
|
||||||
|
left:0;
|
||||||
|
width:100%;
|
||||||
|
height:100%;
|
||||||
|
background:rgba(0,0,0,0.7);
|
||||||
|
color:#fff;
|
||||||
|
font-size:1.5rem;
|
||||||
|
text-align:center;
|
||||||
|
padding-top:20%;
|
||||||
|
z-index:9999;
|
||||||
|
transition: opacity 0.3s ease;
|
||||||
|
">
|
||||||
|
<div>
|
||||||
|
<div class="loader" style="
|
||||||
|
border: 8px solid #f3f3f3;
|
||||||
|
border-top: 8px solid #1a2535;
|
||||||
|
border-radius: 50%;
|
||||||
|
width: 60px;
|
||||||
|
height: 60px;
|
||||||
|
animation: spin 1s linear infinite;
|
||||||
|
margin: 0 auto 1rem auto;
|
||||||
|
"></div>
|
||||||
|
Analyzing website…
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
@keyframes spin {
|
||||||
|
0% { transform: rotate(0deg); }
|
||||||
|
100% { transform: rotate(360deg); }
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const form = document.getElementById('analyze-form');
|
||||||
|
const modal = document.getElementById('spinner-modal');
|
||||||
|
|
||||||
|
function showModal() {
|
||||||
|
modal.style.display = 'block';
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
modal.style.opacity = '1';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function hideModal() {
|
||||||
|
modal.style.opacity = '0';
|
||||||
|
modal.addEventListener('transitionend', () => {
|
||||||
|
modal.style.display = 'none';
|
||||||
|
}, { once: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hide spinner on initial load / back navigation
|
||||||
|
window.addEventListener('pageshow', () => {
|
||||||
|
modal.style.opacity = '0';
|
||||||
|
modal.style.display = 'none';
|
||||||
|
});
|
||||||
|
|
||||||
|
form.addEventListener('submit', (e) => {
|
||||||
|
showModal();
|
||||||
|
// Prevent double submission
|
||||||
|
form.querySelector('button').disabled = true;
|
||||||
|
|
||||||
|
// Allow browser to render the modal before submitting
|
||||||
|
requestAnimationFrame(() => form.submit());
|
||||||
|
e.preventDefault();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
const buttons = document.querySelectorAll('.copy-btn');
|
||||||
|
buttons.forEach(btn => {
|
||||||
|
btn.addEventListener('click', () => {
|
||||||
|
const targetId = btn.getAttribute('data-target');
|
||||||
|
const uuidText = document.getElementById(targetId).innerText;
|
||||||
|
|
||||||
|
navigator.clipboard.writeText(uuidText).then(() => {
|
||||||
|
// Give quick feedback
|
||||||
|
btn.textContent = '✅';
|
||||||
|
setTimeout(() => { btn.textContent = '📋'; }, 1500);
|
||||||
|
}).catch(err => {
|
||||||
|
console.error('Failed to copy UUID:', err);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
268
app/templates/result.html
Normal file
268
app/templates/result.html
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<!-- Top Jump List -->
|
||||||
|
<div class="card" id="top-jump-list">
|
||||||
|
<h2>Jump to Section</h2>
|
||||||
|
<ul>
|
||||||
|
<li><a href="/">Analyse Another Page</a></li>
|
||||||
|
<li><a href="#url-overview">URL Overview</a></li>
|
||||||
|
<li><a href="#enrichment">Enrichment</a></li>
|
||||||
|
<li><a href="#redirects">Redirects</a></li>
|
||||||
|
<li><a href="#forms">Forms</a></li>
|
||||||
|
<li><a href="#scripts">Suspicious Scripts</a></li>
|
||||||
|
<li><a href="#screenshot">Screenshot</a></li>
|
||||||
|
<li><a href="#source">Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- URL Overview -->
|
||||||
|
<div class="card" id="url-overview">
|
||||||
|
<h2>URL Overview</h2>
|
||||||
|
<p><strong>Submitted URL:</strong> {{ submitted_url }}</p>
|
||||||
|
<p><strong>Final URL:</strong> <a href="{{ final_url }}" target="_blank">{{ final_url }}</a></p>
|
||||||
|
<p><strong>Permalink:</strong>
|
||||||
|
<a href="{{ url_for('main.view_result', run_uuid=uuid, _external=True) }}">
|
||||||
|
{{ request.host_url }}results/{{ uuid }}
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Enrichment -->
|
||||||
|
<div class="card" id="enrichment">
|
||||||
|
<h2>Enrichment</h2>
|
||||||
|
|
||||||
|
<!-- WHOIS -->
|
||||||
|
{% if enrichment.whois %}
|
||||||
|
<h3>WHOIS</h3>
|
||||||
|
<table class="enrichment-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Field</th>
|
||||||
|
<th>Value</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for k, v in enrichment.whois.items() %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ k.replace('_', ' ').title() }}</td>
|
||||||
|
<td>{{ v }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if enrichment.raw_whois %}
|
||||||
|
<h3>Raw WHOIS</h3>
|
||||||
|
<pre class="code">{{ enrichment.raw_whois }}</pre>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- GeoIP / IP-API -->
|
||||||
|
{% if enrichment.geoip %}
|
||||||
|
<h3>GeoIP</h3>
|
||||||
|
{% for ip, info in enrichment.geoip.items() %}
|
||||||
|
<details class="card" style="padding:0.5rem; margin-bottom:0.5rem;">
|
||||||
|
<summary>{{ ip }}</summary>
|
||||||
|
<table class="enrichment-table">
|
||||||
|
<tbody>
|
||||||
|
{% for key, val in info.items() %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ key.replace('_', ' ').title() }}</td>
|
||||||
|
<td>{{ val }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</details>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- BEC Words -->
|
||||||
|
{% if enrichment.bec_words %}
|
||||||
|
<h3>BEC Words Detected</h3>
|
||||||
|
<table class="enrichment-table">
|
||||||
|
<thead>
|
||||||
|
<tr><th>Word</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for word in enrichment.bec_words %}
|
||||||
|
<tr><td>{{ word }}</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not enrichment.whois and not enrichment.raw_whois and not enrichment.geoip and not enrichment.bec_words %}
|
||||||
|
<p>No enrichment data available.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Redirects -->
|
||||||
|
<div class="card" id="redirects">
|
||||||
|
<h2>Redirects</h2>
|
||||||
|
{% if redirects %}
|
||||||
|
<table class="enrichment-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>URL</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for r in redirects %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ r.status }}</td>
|
||||||
|
<td><a href="{{ r.url }}" target="_blank">{{ r.url }}</a></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<p>No redirects detected.</p>
|
||||||
|
{% endif %}
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Forms -->
|
||||||
|
<div class="card" id="forms">
|
||||||
|
<h2>Forms</h2>
|
||||||
|
{% if forms %}
|
||||||
|
{% for form in forms %}
|
||||||
|
<details class="card {% if form.flagged %}flagged{% endif %}" style="padding:0.5rem; margin-bottom:0.5rem;">
|
||||||
|
<summary>{{ form.status }} — Action: {{ form.action }} ({{ form.method | upper }})</summary>
|
||||||
|
<table class="enrichment-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Input Name</th>
|
||||||
|
<th>Type</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for inp in form.inputs %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ inp.name }}</td>
|
||||||
|
<td>{{ inp.type }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% if form.flagged %}
|
||||||
|
<p><strong>Flag Reasons:</strong></p>
|
||||||
|
<ul>
|
||||||
|
{% for reason in form.flag_reasons %}
|
||||||
|
<li>{{ reason }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
</details>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<p>No forms detected.</p>
|
||||||
|
{% endif %}
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Suspicious Scripts -->
|
||||||
|
<div class="card" id="scripts">
|
||||||
|
<h2>Suspicious Scripts</h2>
|
||||||
|
|
||||||
|
{% if suspicious_scripts %}
|
||||||
|
<table class="enrichment-table scripts-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Source URL</th>
|
||||||
|
<th>Content Snippet</th>
|
||||||
|
<th>Matches (Rules & Heuristics)</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for s in suspicious_scripts %}
|
||||||
|
<tr>
|
||||||
|
<!-- Type -->
|
||||||
|
<td>{{ s.type or 'unknown' }}</td>
|
||||||
|
|
||||||
|
<!-- Source URL -->
|
||||||
|
<td>
|
||||||
|
{% if s.src %}
|
||||||
|
<a href="{{ s.src }}" target="_blank">{{ s.src }}</a>
|
||||||
|
{% else %}
|
||||||
|
N/A
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Inline content snippet (collapsible) -->
|
||||||
|
<td>
|
||||||
|
{% if s.content_snippet %}
|
||||||
|
<details>
|
||||||
|
<summary>View snippet</summary>
|
||||||
|
<pre class="code">{{ s.content_snippet }}</pre>
|
||||||
|
</details>
|
||||||
|
{% else %}
|
||||||
|
N/A
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Rules & Heuristics -->
|
||||||
|
<td>
|
||||||
|
{% set has_rules = s.rules and s.rules|length > 0 %}
|
||||||
|
{% set has_heur = s.heuristics and s.heuristics|length > 0 %}
|
||||||
|
|
||||||
|
{% if has_rules %}
|
||||||
|
<strong>Rules</strong>
|
||||||
|
<ul>
|
||||||
|
{% for r in s.rules %}
|
||||||
|
<li title="{{ r.description or '' }}">
|
||||||
|
{{ r.name }}
|
||||||
|
{% if r.description %}
|
||||||
|
<small>— {{ r.description }}</small>
|
||||||
|
{% endif %}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if has_heur %}
|
||||||
|
<strong>Heuristics</strong>
|
||||||
|
<ul>
|
||||||
|
{% for h in s.heuristics %}
|
||||||
|
<li>{{ h }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if not has_rules and not has_heur %}
|
||||||
|
N/A
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<p>No suspicious scripts detected.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Screenshot -->
|
||||||
|
<div class="card" id="screenshot">
|
||||||
|
<h2>Screenshot</h2>
|
||||||
|
<img src="{{ url_for('main.artifacts', run_uuid=uuid, filename='screenshot.png') }}" alt="Screenshot">
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Source -->
|
||||||
|
<div class="card" id="source">
|
||||||
|
<h2>Source</h2>
|
||||||
|
<p><a href="{{ url_for('main.artifacts', run_uuid=uuid, filename='source.txt') }}" target="_blank">View Source</a></p>
|
||||||
|
<p><a href="#top-jump-list">Back to top</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
128
app/utils/cache_db.py
Normal file
128
app/utils/cache_db.py
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
import json
|
||||||
|
import time
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
import functools
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- SINGLETON DECORATOR ----------
|
||||||
|
T = Any
|
||||||
|
|
||||||
|
def singleton_loader(func):
|
||||||
|
"""Ensure only one cache instance exists."""
|
||||||
|
cache: dict[str, T] = {}
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(*args, **kwargs) -> T:
|
||||||
|
with lock:
|
||||||
|
if func.__name__ not in cache:
|
||||||
|
cache[func.__name__] = func(*args, **kwargs)
|
||||||
|
return cache[func.__name__]
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
# ---------- CACHE CLASS ----------
|
||||||
|
class CacheDB:
|
||||||
|
"""SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support."""
|
||||||
|
|
||||||
|
TABLE_NAME = "cache"
|
||||||
|
|
||||||
|
def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440):
|
||||||
|
"""
|
||||||
|
:param default_expiration_minutes: default expiration in minutes (default 24 hours)
|
||||||
|
"""
|
||||||
|
self.db_path = Path(db_path)
|
||||||
|
self.default_expiration = default_expiration_minutes * 60 # convert minutes -> seconds
|
||||||
|
|
||||||
|
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||||
|
self.conn.row_factory = sqlite3.Row
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._create_table()
|
||||||
|
|
||||||
|
def _create_table(self):
|
||||||
|
"""Create the cache table if it doesn't exist."""
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT,
|
||||||
|
expires_at INTEGER
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def _cleanup_expired(self):
|
||||||
|
"""Delete expired rows."""
|
||||||
|
now = int(time.time())
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(
|
||||||
|
f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
# ---------- CRUD ----------
|
||||||
|
def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
|
||||||
|
"""Insert or update a cache entry. expires_in_minutes overrides default expiration."""
|
||||||
|
self._cleanup_expired()
|
||||||
|
if expires_in_minutes is None:
|
||||||
|
expires_in_seconds = self.default_expiration
|
||||||
|
else:
|
||||||
|
expires_in_seconds = expires_in_minutes * 60
|
||||||
|
expires_at = int(time.time()) + expires_in_seconds
|
||||||
|
|
||||||
|
value_json = json.dumps(value)
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(
|
||||||
|
f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)",
|
||||||
|
(key, value_json, expires_at)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def read(self, key: str) -> Optional[Any]:
|
||||||
|
"""Read a cache entry. Auto-cleans expired items."""
|
||||||
|
self._cleanup_expired()
|
||||||
|
with self._lock:
|
||||||
|
row = self.conn.execute(
|
||||||
|
f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,)
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return json.loads(row["value"])
|
||||||
|
|
||||||
|
def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
|
||||||
|
"""Update a cache entry. Optional expiration in minutes."""
|
||||||
|
if expires_in_minutes is None:
|
||||||
|
expires_in_seconds = self.default_expiration
|
||||||
|
else:
|
||||||
|
expires_in_seconds = expires_in_minutes * 60
|
||||||
|
expires_at = int(time.time()) + expires_in_seconds
|
||||||
|
|
||||||
|
value_json = json.dumps(value)
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(
|
||||||
|
f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?",
|
||||||
|
(value_json, expires_at, key)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def delete(self, key: str):
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,))
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
"""Delete all rows from the cache table."""
|
||||||
|
with self._lock:
|
||||||
|
self.conn.execute(f"DELETE FROM {self.TABLE_NAME}")
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- SINGLETON INSTANCE ----------
|
||||||
|
@singleton_loader
|
||||||
|
def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB:
|
||||||
|
return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes)
|
||||||
115
app/utils/io_helpers.py
Normal file
115
app/utils/io_helpers.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
||||||
|
|
||||||
|
def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
|
||||||
|
"""Write content to a file safely with logging."""
|
||||||
|
path = Path(path)
|
||||||
|
try:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(path, mode, encoding=encoding) as f:
|
||||||
|
f.write(content)
|
||||||
|
logging.info(f"[+] Wrote file: {path}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"[!] Failed writing {path}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Scan the SANDBOX_STORAGE directory for run folders (UUIDs), read each
|
||||||
|
run's results.json, and return the most recent N entries by file mtime.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
storage_dir (Path): Base path where UUID run directories live.
|
||||||
|
limit (int): Maximum number of recent items to return.
|
||||||
|
logger: Flask or stdlib logger to record non-fatal issues.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict]: Each item includes:
|
||||||
|
{
|
||||||
|
"uuid": str,
|
||||||
|
"submitted_url": str | None,
|
||||||
|
"final_url": str | None,
|
||||||
|
"timestamp": str (ISO 8601),
|
||||||
|
}
|
||||||
|
Returns an empty list if no runs are found or on error.
|
||||||
|
"""
|
||||||
|
items = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ensure the storage dir exists
|
||||||
|
storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Iterate directories directly under storage_dir
|
||||||
|
for entry in storage_dir.iterdir():
|
||||||
|
try:
|
||||||
|
if not entry.is_dir():
|
||||||
|
# Skip non-directories
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Expect results.json inside each UUID directory
|
||||||
|
results_path = entry / "results.json"
|
||||||
|
if not results_path.exists():
|
||||||
|
# Skip folders without results.json
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Read file metadata (mtime) for sorting and display
|
||||||
|
stat_info = results_path.stat()
|
||||||
|
mtime_epoch = stat_info.st_mtime
|
||||||
|
mtime_iso = datetime.fromtimestamp(mtime_epoch).isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
# Parse a small subset of the JSON for display
|
||||||
|
submitted_url = None
|
||||||
|
final_url = None
|
||||||
|
run_uuid = entry.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(results_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
if isinstance(data, dict):
|
||||||
|
submitted_url = data.get("submitted_url")
|
||||||
|
final_url = data.get("final_url")
|
||||||
|
except Exception as read_err:
|
||||||
|
# If JSON is malformed or unreadable, log and continue
|
||||||
|
if logger:
|
||||||
|
logger.warning(f"[recent] Failed reading {results_path}: {read_err}")
|
||||||
|
|
||||||
|
item = {
|
||||||
|
"uuid": run_uuid,
|
||||||
|
"submitted_url": submitted_url,
|
||||||
|
"final_url": final_url,
|
||||||
|
"timestamp": mtime_iso
|
||||||
|
}
|
||||||
|
|
||||||
|
items.append((mtime_epoch, item))
|
||||||
|
except Exception as inner_err:
|
||||||
|
# Keep going; a single bad folder should not break the list
|
||||||
|
if logger:
|
||||||
|
logger.warning(f"[recent] Skipping {entry}: {inner_err}")
|
||||||
|
|
||||||
|
# Sort by mtime desc
|
||||||
|
try:
|
||||||
|
items.sort(key=lambda t: t[0], reverse=True)
|
||||||
|
except Exception as sort_err:
|
||||||
|
if logger:
|
||||||
|
logger.warning(f"[recent] Sort failed: {sort_err}")
|
||||||
|
|
||||||
|
# Trim to limit without list comprehensions
|
||||||
|
trimmed = []
|
||||||
|
count = 0
|
||||||
|
for tup in items:
|
||||||
|
if count >= limit:
|
||||||
|
break
|
||||||
|
trimmed.append(tup[1])
|
||||||
|
count = count + 1
|
||||||
|
|
||||||
|
return trimmed
|
||||||
|
|
||||||
|
except Exception as outer_err:
|
||||||
|
if logger:
|
||||||
|
logger.error(f"[recent] Unexpected error while scanning {storage_dir}: {outer_err}")
|
||||||
|
return []
|
||||||
132
app/utils/rules_engine.py
Normal file
132
app/utils/rules_engine.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
"""
|
||||||
|
rules_engine.py
|
||||||
|
|
||||||
|
A flexible rule-based engine for detecting suspicious patterns in scripts, forms,
|
||||||
|
or other web artifacts inside SneakyScope.
|
||||||
|
|
||||||
|
Each rule is defined as:
|
||||||
|
- name: str # Rule identifier
|
||||||
|
- description: str # Human-readable reason for analysts
|
||||||
|
- category: str # e.g., 'script', 'form', 'text', 'generic'
|
||||||
|
- type: str # 'regex' or 'function'
|
||||||
|
- pattern: str # Regex pattern (if type=regex)
|
||||||
|
- function: callable # Python function returning (bool, str) (if type=function)
|
||||||
|
|
||||||
|
The framework returns a list of results, with pass/fail and reasoning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Dict, List, Tuple, Union
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
class Rule:
|
||||||
|
"""Represents a single detection rule."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
description: str,
|
||||||
|
category: str,
|
||||||
|
rule_type: str = "regex",
|
||||||
|
pattern: str = None,
|
||||||
|
function: Callable = None,
|
||||||
|
):
|
||||||
|
self.name = name
|
||||||
|
self.description = description
|
||||||
|
self.category = category
|
||||||
|
self.rule_type = rule_type
|
||||||
|
self.pattern = pattern
|
||||||
|
self.function = function
|
||||||
|
|
||||||
|
def run(self, text: str) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Run the rule on given text.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(matched: bool, reason: str)
|
||||||
|
"""
|
||||||
|
if self.rule_type == "regex" and self.pattern:
|
||||||
|
if re.search(self.pattern, text, re.IGNORECASE):
|
||||||
|
return True, f"Matched regex '{self.pattern}' → {self.description}"
|
||||||
|
else:
|
||||||
|
return False, "No match"
|
||||||
|
elif self.rule_type == "function" and callable(self.function):
|
||||||
|
return self.function(text)
|
||||||
|
else:
|
||||||
|
return False, "Invalid rule configuration"
|
||||||
|
|
||||||
|
|
||||||
|
class RuleEngine:
|
||||||
|
"""Loads and executes rules against provided text."""
|
||||||
|
|
||||||
|
def __init__(self, rules: List[Rule] = None):
|
||||||
|
self.rules = rules or []
|
||||||
|
|
||||||
|
def add_rule(self, rule: Rule):
|
||||||
|
"""Add a new rule at runtime."""
|
||||||
|
self.rules.append(rule)
|
||||||
|
|
||||||
|
def run_all(self, text: str, category: str = None) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Run all rules against text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: str → the content to test
|
||||||
|
category: str → optional, only run rules in this category
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with rule results.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
for rule in self.rules:
|
||||||
|
if category and rule.category != category:
|
||||||
|
continue
|
||||||
|
|
||||||
|
matched, reason = rule.run(text)
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"rule": rule.name,
|
||||||
|
"category": rule.category,
|
||||||
|
"matched": matched,
|
||||||
|
"reason": reason if matched else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
|
||||||
|
"""
|
||||||
|
Load rules from a YAML file.
|
||||||
|
|
||||||
|
Example YAML format:
|
||||||
|
- name: suspicious_eval
|
||||||
|
description: "Use of eval() in script"
|
||||||
|
category: script
|
||||||
|
type: regex
|
||||||
|
pattern: "\\beval\\("
|
||||||
|
|
||||||
|
- name: password_reset
|
||||||
|
description: "Password reset wording"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
pattern: "reset password"
|
||||||
|
|
||||||
|
"""
|
||||||
|
rules = []
|
||||||
|
with open(yaml_file, "r", encoding="utf-8") as f:
|
||||||
|
data = yaml.safe_load(f)
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
rule = Rule(
|
||||||
|
name=item["name"],
|
||||||
|
description=item["description"],
|
||||||
|
category=item["category"],
|
||||||
|
rule_type=item.get("type", "regex"),
|
||||||
|
pattern=item.get("pattern"),
|
||||||
|
)
|
||||||
|
rules.append(rule)
|
||||||
|
|
||||||
|
return rules
|
||||||
144
app/utils/settings.py
Normal file
144
app/utils/settings.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
#
|
||||||
|
# Note the settings file is hardcoded in this class at the top after imports.
|
||||||
|
#
|
||||||
|
# To make a new settings section, just add the setting dict to your yaml
|
||||||
|
# and then define the data class below in the config data classes area.
|
||||||
|
#
|
||||||
|
# Example use from anywhere - this will always return the same singleton
|
||||||
|
# from settings import get_settings
|
||||||
|
# def main():
|
||||||
|
# settings = get_settings()
|
||||||
|
# print(settings.database.host) # Autocomplete works
|
||||||
|
# print(settings.logging.level)
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# main()
|
||||||
|
|
||||||
|
import functools
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
from dataclasses import dataclass, fields, is_dataclass, field, MISSING
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
logger = logging.getLogger(__file__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
msg = (
|
||||||
|
"Required modules are not installed. "
|
||||||
|
"Can not continue with module / application loading.\n"
|
||||||
|
"Install it with: pip install -r requirements"
|
||||||
|
)
|
||||||
|
print(msg, file=sys.stderr)
|
||||||
|
logger.error(msg)
|
||||||
|
exit()
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
DEFAULT_SETTINGS_FILE = BASE_DIR / "config" / "settings.yaml"
|
||||||
|
|
||||||
|
# ---------- CONFIG DATA CLASSES ----------
|
||||||
|
@dataclass
|
||||||
|
class Cache_Config:
|
||||||
|
whois_cache_days: int = 7
|
||||||
|
geoip_cache_days: int = 7
|
||||||
|
recent_runs_count: int = 10
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AppConfig:
|
||||||
|
name: str = "MyApp"
|
||||||
|
version_major: int = 1
|
||||||
|
version_minor: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Settings:
|
||||||
|
cache: Cache_Config = field(default_factory=Cache_Config)
|
||||||
|
app: AppConfig = field(default_factory=AppConfig)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_yaml(cls, path: str | Path) -> "Settings":
|
||||||
|
try:
|
||||||
|
"""Load settings from YAML file into a Settings object."""
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
raw: dict[str, Any] = yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning(f"Settings file {path} not found! Using default settings.")
|
||||||
|
raw = {}
|
||||||
|
|
||||||
|
init_kwargs = {}
|
||||||
|
for f_def in fields(cls):
|
||||||
|
yaml_value = raw.get(f_def.name, None)
|
||||||
|
|
||||||
|
# Determine default value from default_factory or default
|
||||||
|
if f_def.default_factory is not MISSING:
|
||||||
|
default_value = f_def.default_factory()
|
||||||
|
elif f_def.default is not MISSING:
|
||||||
|
default_value = f_def.default
|
||||||
|
else:
|
||||||
|
default_value = None
|
||||||
|
|
||||||
|
# Handle nested dataclasses
|
||||||
|
if is_dataclass(f_def.type):
|
||||||
|
if isinstance(yaml_value, dict):
|
||||||
|
# Merge YAML values with defaults
|
||||||
|
merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
|
||||||
|
merged_data.update(yaml_value)
|
||||||
|
init_kwargs[f_def.name] = f_def.type(**merged_data)
|
||||||
|
else:
|
||||||
|
init_kwargs[f_def.name] = default_value
|
||||||
|
else:
|
||||||
|
init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
|
||||||
|
|
||||||
|
return cls(**init_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- SINGLETON DECORATOR ----------
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
|
||||||
|
"""Ensure the function only runs once, returning the cached value."""
|
||||||
|
cache: dict[str, T] = {}
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(*args, **kwargs) -> T:
|
||||||
|
if func.__name__ not in cache:
|
||||||
|
cache[func.__name__] = func(*args, **kwargs)
|
||||||
|
return cache[func.__name__]
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- SINGLETON DECORATOR ----------
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
|
||||||
|
"""Decorator to ensure the settings are loaded only once."""
|
||||||
|
cache: dict[str, T] = {}
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(*args, **kwargs) -> T:
|
||||||
|
if func.__name__ not in cache:
|
||||||
|
cache[func.__name__] = func(*args, **kwargs)
|
||||||
|
return cache[func.__name__]
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@singleton_loader
|
||||||
|
def get_settings(config_path: str | Path | None = None) -> Settings:
|
||||||
|
"""
|
||||||
|
Returns the singleton Settings instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config_path: Optional path to the YAML config file. If not provided,
|
||||||
|
defaults to 'config/settings.yaml' in the current working directory.
|
||||||
|
"""
|
||||||
|
if config_path is None:
|
||||||
|
config_path = DEFAULT_SETTINGS_FILE
|
||||||
|
else:
|
||||||
|
config_path = Path(config_path)
|
||||||
|
|
||||||
|
return Settings.from_yaml(config_path)
|
||||||
10
app/wsgi.py
Normal file
10
app/wsgi.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
"""
|
||||||
|
app/wsgi.py
|
||||||
|
|
||||||
|
Gunicorn entrypoint for SneakyScope.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from . import create_app
|
||||||
|
|
||||||
|
# Gunicorn will look for "app"
|
||||||
|
app = create_app()
|
||||||
13
docker-compose.yaml
Normal file
13
docker-compose.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
services:
|
||||||
|
web:
|
||||||
|
build: .
|
||||||
|
container_name: url-sandbox-web
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true
|
||||||
|
restart: unless-stopped
|
||||||
71
docs/roadmap.md
Normal file
71
docs/roadmap.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
|
||||||
|
## Priority 1 – Core Functionality / Stability
|
||||||
|
|
||||||
|
**Permissions / Storage Paths**
|
||||||
|
|
||||||
|
* ✅ `/data` and other mounted volumes setup handled by `sandbox.sh`
|
||||||
|
* ✅ Downloads, screenshots, and HTML artifacts are written correctly (`safe_write` in `io_helpers.py`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority 2 – Data Accuracy / Enrichment
|
||||||
|
|
||||||
|
**WHOIS & GeoIP Enhancements**
|
||||||
|
|
||||||
|
* ✅ Implemented Python-based WHOIS parsing with fallback to raw WHOIS text
|
||||||
|
* ✅ Default `"Possible Privacy"` or `"N/A"` for missing WHOIS fields
|
||||||
|
* ✅ GeoIP + ASN + ISP info displayed per IP in **accordion tables**
|
||||||
|
* ✅ Cache WHOIS and GeoIP results to reduce repeated queries
|
||||||
|
|
||||||
|
**Suspicious Scripts & Forms**
|
||||||
|
|
||||||
|
* [ ] Expand flagged script and form output with reasons for analysts
|
||||||
|
* [ ] Show each check and if it triggered flags (pass/fail for each check)
|
||||||
|
|
||||||
|
**Add Suspicious BEC words**
|
||||||
|
|
||||||
|
* ✅ Look for things like `"reset password"`
|
||||||
|
* ✅ Make configurable via a config file (yaml doc with rules)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority 3 – User Interface / UX
|
||||||
|
|
||||||
|
**Front Page / Input Handling**
|
||||||
|
|
||||||
|
* [ ] Automatically prepend `http://`, `https://`, and/or `www.` if a user only enters a domain
|
||||||
|
|
||||||
|
**Result Templates / Cards**
|
||||||
|
* [ ] load sourcecode for webpage in a code editor view or code block on page so that it's easier to read
|
||||||
|
* [ ] Update result cards with clear, analyst-friendly explanations
|
||||||
|
* [ ] Include flagged logic and reason lists for scripts and forms
|
||||||
|
* ✅ Display GeoIP results in accordion tables (✅ done)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority 4 – API Layer
|
||||||
|
|
||||||
|
**API Endpoints**
|
||||||
|
|
||||||
|
* [ ] Add `/screenshot` endpoint
|
||||||
|
* [ ] Add `/source` endpoint
|
||||||
|
* [ ] Add `/analyse` endpoint
|
||||||
|
|
||||||
|
**OpenAPI + Docs**
|
||||||
|
|
||||||
|
* [ ] Create initial `openapi/openapi.yaml` spec file
|
||||||
|
* [ ] Serve spec at `/api/openapi.yaml`
|
||||||
|
* [ ] Wire up Swagger UI or Redoc at `/docs` for interactive API exploration
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority 5 – Optional / Cleanup
|
||||||
|
|
||||||
|
**Artifact Management**
|
||||||
|
* [ ] Implement saving of results from a UUID as "results.json" so we don't rerun all the rules and just load from cache.
|
||||||
|
* [ ] Implement cleanup or retention policy for old artifacts
|
||||||
|
* [ ] Optional: Add periodic maintenance scripts for storage
|
||||||
|
|
||||||
|
**Extra Features**
|
||||||
|
|
||||||
|
* [ ] Placeholder for additional features (e.g., bulk URL analysis, alerting, integrations)
|
||||||
22
entrypoint.sh
Normal file
22
entrypoint.sh
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Ensure browsers are installed (the base image already has them, but this is safe)
|
||||||
|
python - <<'PY'
|
||||||
|
from pathlib import Path
|
||||||
|
from playwright.__main__ import main as pw
|
||||||
|
# no-op import ensures playwright is present; install step below is quick if cached
|
||||||
|
PY
|
||||||
|
|
||||||
|
# Run the app via gunicorn
|
||||||
|
# graceful-timeout - 300 ensures long page loads aren’t killed prematurely
|
||||||
|
# threads - 8 gives us more threads to work with
|
||||||
|
# gthread allows each worker to handle multiple threads, so async/blocking tasks like Playwright won’t block the whole worker
|
||||||
|
exec gunicorn \
|
||||||
|
--bind 0.0.0.0:8000 \
|
||||||
|
--workers 2 \
|
||||||
|
--threads 8 \
|
||||||
|
--worker-class gthread \
|
||||||
|
--timeout 300 \
|
||||||
|
--graceful-timeout 300 \
|
||||||
|
"app.wsgi:app"
|
||||||
94
openapi/openapi.yaml
Normal file
94
openapi/openapi.yaml
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
openapi: 3.0.3
|
||||||
|
info:
|
||||||
|
title: URL Sandbox API
|
||||||
|
version: 0.1.0
|
||||||
|
description: API for analyzing and extracting website artifacts.
|
||||||
|
|
||||||
|
servers:
|
||||||
|
- url: http://localhost:5000/api
|
||||||
|
description: Local development
|
||||||
|
|
||||||
|
paths:
|
||||||
|
/screenshot:
|
||||||
|
post:
|
||||||
|
summary: Capture a screenshot of a website
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- url
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
example: "http://example.com"
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Screenshot image returned
|
||||||
|
content:
|
||||||
|
image/png: {}
|
||||||
|
'400':
|
||||||
|
description: Invalid request
|
||||||
|
|
||||||
|
/source:
|
||||||
|
post:
|
||||||
|
summary: Retrieve HTML source of a website
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- url
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
example: "http://example.com"
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Raw HTML source
|
||||||
|
content:
|
||||||
|
text/html:
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
'400':
|
||||||
|
description: Invalid request
|
||||||
|
|
||||||
|
/analyse:
|
||||||
|
post:
|
||||||
|
summary: Run full analysis on a website
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- url
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
example: "http://example.com"
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: JSON with enrichment and analysis results
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
whois:
|
||||||
|
type: object
|
||||||
|
geoip:
|
||||||
|
type: object
|
||||||
|
flags:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
'400':
|
||||||
|
description: Invalid request
|
||||||
14
requirements.txt
Normal file
14
requirements.txt
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
Flask>=3.0.3
|
||||||
|
Jinja2>=3.1.4
|
||||||
|
Werkzeug>=3.0.3
|
||||||
|
itsdangerous>=2.2.0
|
||||||
|
click>=8.1.7
|
||||||
|
lxml>=5.3.0
|
||||||
|
playwright==1.45.0 # Playwright stack
|
||||||
|
beautifulsoup4>=4.12.3 # HTML parsing, etc.
|
||||||
|
gunicorn>=22.0.0 # Production server
|
||||||
|
python-whois # For WHOIS lookups
|
||||||
|
geoip2 # MaxMind GeoLite2 database for IP geolocation
|
||||||
|
dnspython # For DNS lookups, including A/AAAA records
|
||||||
|
ipwhois
|
||||||
|
PyYAML
|
||||||
101
sandbox.sh
Executable file
101
sandbox.sh
Executable file
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# --- CONFIG ---
|
||||||
|
SANDBOX_STORAGE="${SANDBOX_STORAGE:-./data}"
|
||||||
|
APP_URL="${APP_URL:-http://localhost:8000}"
|
||||||
|
|
||||||
|
# --- FUNCTIONS ---
|
||||||
|
prepare_storage() {
|
||||||
|
echo "[*] Checking storage path: $SANDBOX_STORAGE"
|
||||||
|
if [ ! -d "$SANDBOX_STORAGE" ]; then
|
||||||
|
echo " -> Creating $SANDBOX_STORAGE on host"
|
||||||
|
sudo mkdir -p "$SANDBOX_STORAGE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " -> Setting ownership to Playwright user (pwuser / UID 1000)"
|
||||||
|
sudo chown -R 1000:1000 "$SANDBOX_STORAGE"
|
||||||
|
sudo chmod -R 755 "$SANDBOX_STORAGE"
|
||||||
|
|
||||||
|
echo "[+] Storage ready."
|
||||||
|
}
|
||||||
|
|
||||||
|
start_stack() {
|
||||||
|
prepare_storage
|
||||||
|
echo "[*] Building Docker image..."
|
||||||
|
docker compose build
|
||||||
|
|
||||||
|
if [[ "${1:-}" == "-d" ]]; then
|
||||||
|
echo "[*] Starting services in detached mode..."
|
||||||
|
docker compose up -d
|
||||||
|
else
|
||||||
|
echo "[*] Starting services (attached)..."
|
||||||
|
docker compose up
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_stack() {
|
||||||
|
echo "[*] Stopping services..."
|
||||||
|
docker compose down
|
||||||
|
}
|
||||||
|
|
||||||
|
clean_stack() {
|
||||||
|
echo "[*] Removing containers, networks, and volumes..."
|
||||||
|
docker compose down -v --remove-orphans
|
||||||
|
}
|
||||||
|
|
||||||
|
restart_stack() {
|
||||||
|
stop_stack
|
||||||
|
echo "[*] Restarting services..."
|
||||||
|
start_stack -d
|
||||||
|
}
|
||||||
|
|
||||||
|
logs_stack() {
|
||||||
|
echo "[*] Showing logs (Ctrl+C to exit)..."
|
||||||
|
docker compose logs -f
|
||||||
|
}
|
||||||
|
|
||||||
|
status_stack() {
|
||||||
|
echo "[*] Current service status:"
|
||||||
|
docker compose ps
|
||||||
|
}
|
||||||
|
|
||||||
|
healthcheck_stack() {
|
||||||
|
echo "[*] Running health check on $APP_URL ..."
|
||||||
|
if curl -fsS "$APP_URL" > /dev/null; then
|
||||||
|
echo "[+] Service is healthy and reachable."
|
||||||
|
else
|
||||||
|
echo "[!] Service is NOT reachable at $APP_URL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- MAIN ---
|
||||||
|
case "${1:-}" in
|
||||||
|
start)
|
||||||
|
shift
|
||||||
|
start_stack "$@"
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
stop_stack
|
||||||
|
;;
|
||||||
|
restart)
|
||||||
|
restart_stack
|
||||||
|
;;
|
||||||
|
clean)
|
||||||
|
clean_stack
|
||||||
|
;;
|
||||||
|
logs)
|
||||||
|
logs_stack
|
||||||
|
;;
|
||||||
|
status)
|
||||||
|
status_stack
|
||||||
|
;;
|
||||||
|
healthcheck)
|
||||||
|
healthcheck_stack
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Usage: $0 {start [-d for detached mode] | stop | restart | clean | logs | status | healthcheck}"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
Reference in New Issue
Block a user