first commit

2025-08-20 21:22:28 +00:00
commit 70d29f9f95
26 changed files with 2558 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,10 @@
+# Flask Configuration
+FLASK_ENV=production
+SECRET_KEY=changeme_super_long_random_secret
+PYTHONUNBUFFERED=1
+
+# Playwright (browser automation)
+PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+
+# Sandbox Storage
+SANDBOX_STORAGE=/data
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.env
+/data/
--- a/34
+++ b/34
@@ -0,0 +1,34 @@
+# Use the official Playwright image with browsers preinstalled
+FROM mcr.microsoft.com/playwright/python:v1.45.0-jammy
+
+# Create a non-root user (the base image already has pwuser, we'll keep it)
+USER root
+
+# System deps (whois, dig, etc. — handy for later stages)
+RUN apt-get update \ 
+    && apt-get install -y --no-install-recommends \
+       whois dnsutils iputils-ping ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy requirements first to leverage Docker layer caching
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code (the double app is needed because the app folder needs to be inside the app folder)
+COPY app/ /app/app/
+
+COPY entrypoint.sh ./entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+
+# Create data dir for screenshots/artifacts
+RUN mkdir -p /data && chown -R pwuser:pwuser /data /app
+
+USER pwuser
+
+# Expose port
+EXPOSE 8000
+
+# Start server
+ENTRYPOINT ["/app/entrypoint.sh"]
--- a/Readme.md
+++ b/Readme.md
@@ -0,0 +1,92 @@
+# URL Sandbox
+
+A lightweight web-based sandbox for analyzing websites and domains.  
+It performs WHOIS lookups, GeoIP enrichment, script/form inspection, and provides analyst-friendly output.
+
+---
+
+## 🚀 Features
+
+- **Domain & IP Enrichment**
+  - WHOIS lookups with fallback to raw text when fields are missing
+  - Explicit handling of privacy-protected WHOIS records (`N/A` or `Possible Privacy`)
+  - GeoIP (City, Region, Country, Latitude/Longitude)
+  - ASN, ISP, and network details
+- **Flagged Content Analysis**
+  - Suspicious script detection
+  - Suspicious form detection
+  - Nested bullet-style reporting for clarity
+- **Improved UX**
+  - Automatic addition of `http://`, `https://`, and `www.` if only a domain is provided
+  - Modal spinner to indicate background analysis (`Analyzing website…`)
+- **Resilient GeoLite2 Database Management**
+  - Downloads the MaxMind GeoLite2-City database on first startup
+  - Checks file age and only re-downloads if older than **14 days** (configurable via environment variable)
+
+---
+
+## ⚙️ Setup Instructions
+
+### 1. Clone the Repository
+```bash
+git clone https://github.com/yourusername/url-sandbox.git
+cd url-sandbox
+```
+
+### 2. Create a MaxMind Account & License Key
+1. Go to [MaxMind GeoLite2](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data)
+2. Sign up for a free account
+3. Navigate to **Account > Manage License Keys**
+4. Generate a new license key
+
+### 3. Configure Environment Variables
+All environment variables are loaded from a `.env` file.  
+
+1. Copy the sample file:  
+```bash
+   cp .env.example .env
+````
+
+2. Edit `.env` and set your values (see [`.env.example`](./.env.example) for available options).
+
+Make sure to add your **MaxMind License Key** under `MAXMIND_LICENSE_KEY`.
+
+
+### 4. Run with Docker Compose
+```bash
+docker-compose up --build
+```
+
+This will:
+- Build the app
+- Download the GeoLite2 database if not present or too old
+- Start the web interface
+
+---
+
+## 📝 Example Output
+
+**WHOIS Info**
+- Registrar: MarkMonitor, Inc.
+- Organization: Possible Privacy
+- Creation: 1997-09-15
+- Expiration: 2028-09-14
+
+**GeoIP Info**
+- IP: 172.66.159.20
+  - City: N/A  
+  - Region: N/A  
+  - Country: United States  
+  - Coordinates: (37.751, -97.822)
+  - ASN: 13335  
+  - ISP: Cloudflare, Inc.
+
+---
+
+## 📌 Roadmap
+See [Next Steps Checklist](docs/roadmap.md) for planned features:
+- Improved UI templates
+- Artifact cleanup
+- Proxy support (optional)
+
+---
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1,82 @@
+"""
+app/__init__.py
+
+Application factory and startup hooks for SneakyScope.
+
+Responsibilities:
+- Create the Flask app.
+- Load settings (YAML -> dataclasses) with safe defaults.
+- Initialize and load the Suspicious Rules Engine from YAML.
+- Register blueprints (routes).
+- Configure core paths (e.g., SANDBOX_STORAGE).
+"""
+
+import os
+import logging
+from pathlib import Path
+from flask import Flask
+
+# Local imports
+from .utils.settings import get_settings
+from .utils import io_helpers  # if you need logging/setup later
+from .utils import cache_db     # available for future injections
+from .utils.rules_engine import RuleEngine, load_rules_from_yaml  # rules engine
+from . import routes  # blueprint
+
+
+def create_app() -> Flask:
+    """
+    Create and configure the Flask application instance.
+
+    Returns:
+        Flask: The configured Flask app.
+    """
+    # Basic app object
+    app = Flask(__name__, template_folder="templates", static_folder="static")
+
+    # Load settings (safe fallback to defaults if file missing)
+    settings = get_settings()
+
+    # Secret key loaded from env
+    app.secret_key = os.getenv("SECRET_KEY")
+
+    # Configure storage directory (bind-mount is still handled by sandbox.sh)
+    sandbox_storage_default = Path("/data")
+    app.config["SANDBOX_STORAGE"] = str(sandbox_storage_default)
+
+    # Initialize Suspicious Rules Engine at startup
+    # Determine rules file path relative to this package
+    base_dir = Path(__file__).resolve().parent
+    rules_path = base_dir / "config" / "suspicious_rules.yaml"
+
+    # Create an engine instance (even if file missing, we still want an engine)
+    engine = RuleEngine()
+
+    # Try to load from YAML if present; log clearly if not
+    if rules_path.exists():
+        try:
+            loaded_rules = load_rules_from_yaml(rules_path)
+            # Add rules one-by-one (explicit)
+            for rule in loaded_rules:
+                engine.add_rule(rule)
+            app.logger.info(f"[+] Loaded {len(loaded_rules)} suspicious rules from {rules_path}")
+        except Exception as e:
+            app.logger.warning(f"[!] Failed loading rules from {rules_path}: {e}")
+    else:
+        app.logger.warning(f"[!] Rules file not found at {rules_path}. Engine will start with zero rules.")
+
+    # Store engine on app config so it is accessible via current_app
+    app.config["RULE_ENGINE"] = engine
+
+    # Make app name/version available for templates here if you want it globally
+    app.config["APP_NAME"] = settings.app.name
+    app.config["APP_VERSION"] = f"v{settings.app.version_major}.{settings.app.version_minor}"
+
+    # Register blueprints
+    app.register_blueprint(routes.bp)
+
+    # Example log line so we know we booted cleanly
+    app.logger.info(f"SneakyScope started: {app.config['APP_NAME']} {app.config['APP_VERSION']}")
+    app.logger.info(f"SANDBOX_STORAGE: {app.config['SANDBOX_STORAGE']}")
+
+    return app
--- a/app/browser.py
+++ b/app/browser.py
@@ -0,0 +1,400 @@
+import re
+import uuid
+import json
+from pathlib import Path
+from bs4 import BeautifulSoup
+from datetime import datetime
+from urllib.parse import urlparse
+from typing import Dict, Any, Optional
+from playwright.async_api import async_playwright, TimeoutError as PWTimeoutError
+
+from flask import current_app  # access the rule engine from app config
+
+from app.utils.io_helpers import safe_write
+from .enrichment import enrich_url
+
+def get_rule_engine():
+    """
+    Retrieve the rules engine instance from the Flask application config.
+
+    Returns:
+        RuleEngine or None: The engine if available, or None if not configured.
+    """
+    try:
+        # current_app is only available during an active request context
+        engine = current_app.config.get("RULE_ENGINE")
+        return engine
+    except Exception:
+        # If called outside a Flask request context, fail gracefully
+        return None
+
+
+def run_rule_checks(text, category):
+    """
+    Run all rules for a given category against the provided text.
+
+    Args:
+        text (str): The content to test (e.g., form snippet, inline JS).
+        category (str): The rule category to run (e.g., 'form' or 'script').
+
+    Returns:
+        dict: {
+            "checks": [ { "rule": str, "category": str, "matched": bool, "reason": Optional[str] }, ... ],
+            "summary": { "matched_count": int, "total_rules": int }
+        }
+    """
+    result = {
+        "checks": [],
+        "summary": {
+            "matched_count": 0,
+            "total_rules": 0
+        }
+    }
+
+    engine = get_rule_engine()
+    if engine is None:
+        # No engine configured; return empty but well-formed structure
+        return result
+
+    try:
+        # Run engine rules for the specified category
+        check_results = engine.run_all(text, category=category)
+
+        # Normalize results into the expected structure
+        total = 0
+        matched = 0
+
+        for item in check_results:
+            # item is expected to contain: rule, category, matched, reason (optional)
+            total = total + 1
+            if bool(item.get("matched")):
+                matched = matched + 1
+
+            normalized = {
+                "rule": item.get("rule"),
+                "category": item.get("category"),
+                "matched": bool(item.get("matched")),
+                "reason": item.get("reason")
+            }
+            result["checks"].append(normalized)
+
+        result["summary"]["matched_count"] = matched
+        result["summary"]["total_rules"] = total
+
+    except Exception as e:
+        # If anything goes wrong, keep structure and add a fake failure note
+        result["checks"].append({
+            "rule": "engine_error",
+            "category": category,
+            "matched": False,
+            "reason": f"Rule engine error: {e}"
+        })
+        result["summary"]["matched_count"] = 0
+        result["summary"]["total_rules"] = 0
+
+    return result
+
+
+def analyze_forms(html: str, base_url: str):
+    """
+    Parse forms from the page HTML and apply heuristic flags and rule-based checks.
+
+    Args:
+        html (str): The full page HTML.
+        base_url (str): The final URL of the page (used for hostname comparisons).
+
+    Returns:
+        list[dict]: A list of form analysis dictionaries, each including:
+            - action, method, inputs
+            - flagged (bool), flag_reasons (list[str]), status (str)
+            - rule_checks: dict with "checks" (list) and "summary" (dict)
+    """
+    soup = BeautifulSoup(html, "lxml")
+    forms_info = []
+    page_hostname = urlparse(base_url).hostname
+
+    for form in soup.find_all("form"):
+        action = form.get("action")
+        method = form.get("method", "get").lower()
+
+        # Build explicit inputs list
+        inputs = []
+        for inp in form.find_all("input"):
+            input_name = inp.get("name")
+            input_type = inp.get("type", "text")
+            inputs.append({
+                "name": input_name,
+                "type": input_type
+            })
+
+        flagged_reasons = []
+
+        # No action specified
+        if not action or str(action).strip() == "":
+            flagged_reasons.append("No action specified")
+
+        # External host
+        else:
+            try:
+                action_host = urlparse(action).hostname
+                if not str(action).startswith("/") and action_host != page_hostname:
+                    flagged_reasons.append("Submits to a different host")
+            except Exception:
+                # If hostname parsing fails, skip this condition quietly
+                pass
+
+        # HTTP form on HTTPS page
+        try:
+            if urlparse(action).scheme == "http" and urlparse(base_url).scheme == "https":
+                flagged_reasons.append("Submits over insecure HTTP")
+        except Exception:
+            # If scheme parsing fails, ignore
+            pass
+
+        # Hidden password / suspicious hidden inputs
+        for hidden in form.find_all("input", type="hidden"):
+            name_value = hidden.get("name") or ""
+            if "password" in name_value.lower():
+                flagged_reasons.append("Hidden password field")
+
+        flagged = bool(flagged_reasons)
+
+        # Serialize a simple form snippet for the rules engine to analyze (category='form')
+        snippet_lines = []
+        snippet_lines.append(f"action={action}")
+        snippet_lines.append(f"method={method}")
+        snippet_lines.append("inputs=")
+        for item in inputs:
+            snippet_lines.append(f"  - name={item.get('name')} type={item.get('type')}")
+        form_snippet = "\n".join(snippet_lines)
+
+        rule_checks = run_rule_checks(form_snippet, category="form")
+
+        forms_info.append({
+            "action": action,
+            "method": method,
+            "inputs": inputs,
+            "flagged": flagged,
+            "flag_reasons": flagged_reasons,
+            "status": "flagged" if flagged else "possibly safe",
+            "rule_checks": rule_checks
+        })
+
+    return forms_info
+
+
+def analyze_scripts(html: str, base_url: str = "", engine=None) -> list[dict]:
+    """
+    Analyze <script> elements using the RuleEngine (if provided) and
+    lightweight built-in heuristics. Only append a record when at least
+    one rule or heuristic matches, and always set a sensible 'type'.
+
+    Returns list of dicts like:
+      {
+        "type": "external" | "inline" | "unknown",
+        "src": "...",                 # for external
+        "content_snippet": "...",     # for inline
+        "rules": [ { "name": "...", "description": "..." }, ... ],
+        "heuristics": [ "reason1", "reason2", ... ]
+      }
+    """
+    soup = BeautifulSoup(html, "lxml")
+    results: list[dict] = []
+
+    import re
+    from urllib.parse import urlparse
+
+    # Benign MIME types we ignore entirely
+    benign_types = {"application/ld+json", "application/json"}
+
+    # Suspicious file extensions for external scripts
+    dangerous_ext = (".vbs", ".hta")
+
+    # Inline red flags
+    risky_inline_patterns = [
+        (re.compile(r"\beval\s*\(", re.IGNORECASE), "Uses eval()"),
+        (re.compile(r"\bnew\s+Function\s*\(", re.IGNORECASE), "Uses Function constructor"),
+        (re.compile(r"\bdocument\.write\s*\(", re.IGNORECASE), "Uses document.write()"),
+        (re.compile(r"\bActiveXObject\s*\(", re.IGNORECASE), "Uses ActiveXObject (IE-only)"),
+        (re.compile(r"\batob\s*\(", re.IGNORECASE), "Uses atob() (possible obfuscation)"),
+        (re.compile(r"\bunescape\s*\(", re.IGNORECASE), "Uses unescape() (legacy/obfuscation)"),
+        (re.compile(r"\bset(?:Timeout|Interval)\s*\(\s*['\"`].+['\"`]\s*,", re.IGNORECASE),
+         "String passed to setTimeout/setInterval"),
+        (re.compile(r"[\"']?0x[0-9a-fA-F]{16,}[\"']?", re.IGNORECASE),
+         "Contains long hex-like constants (possible obfuscation)"),
+    ]
+
+    base_host = urlparse(base_url).hostname or ""
+
+    for script in soup.find_all("script"):
+        try:
+            src = (script.get("src") or "").strip()
+            s_type_attr = (script.get("type") or "").strip().lower()
+
+            # IMPORTANT: .string is often None; get_text() is reliable
+            inline_text = script.get_text(strip=True) or ""
+
+            # Skip benign structured data outright
+            if s_type_attr in benign_types:
+                continue
+
+            # ---- Build facts for the rules engine
+            facts = {
+                "script_type_attr": s_type_attr or None,
+                "has_src": bool(src),
+                "src": src or None,
+                "attrs": dict(script.attrs),
+                "inline_len": len(inline_text),
+                "inline_preview": inline_text[:200].replace("\n", " ") if inline_text else None,
+                "base_url": base_url or None,
+                "base_hostname": base_host or None,
+                "src_hostname": urlparse(src).hostname if src else None,
+            }
+
+            # ---- Evaluate rules engine (using name/description)
+            engine_matches: list[dict] = []
+            if engine is not None:
+                try:
+                    if hasattr(engine, "evaluate_script"):
+                        matches = engine.evaluate_script(facts)
+                    elif hasattr(engine, "evaluate"):
+                        matches = engine.evaluate(facts)
+                    else:
+                        matches = []
+
+                    if isinstance(matches, list):
+                        for m in matches:
+                            if isinstance(m, dict) and "name" in m:
+                                engine_matches.append({
+                                    "name": m["name"],
+                                    "description": m.get("description", "")
+                                })
+                            elif isinstance(m, str):
+                                engine_matches.append({"name": m, "description": ""})
+                except Exception as e:
+                    engine_matches.append({"name": "Rules Engine Error", "description": str(e)})
+
+            # ---- Built-in heuristics
+            heuristics: list[str] = []
+            if src:
+                # Unusual URL schemes for script sources
+                if src.startswith(("data:", "blob:")):
+                    heuristics.append("Script src uses data:/blob: URL")
+                # Dangerous extensions
+                for ext in dangerous_ext:
+                    if src.lower().endswith(ext):
+                        heuristics.append(f"External script with dangerous extension ({ext.lstrip('.')})")
+                        break
+                # Third-party host hint
+                src_host = facts.get("src_hostname") or ""
+                if base_host and src_host and src_host != base_host:
+                    heuristics.append(f"Third-party host: {src_host}")
+            else:
+                if inline_text:
+                    for pat, why in risky_inline_patterns:
+                        if pat.search(inline_text):
+                            heuristics.append(why)
+
+            # ---- Only append when something matched; always set type
+            if engine_matches or heuristics:
+                record: dict = {}
+
+                if src:
+                    record["type"] = "external"
+                    record["src"] = src
+                elif inline_text:
+                    record["type"] = "inline"
+                    record["content_snippet"] = facts.get("inline_preview")
+                else:
+                    record["type"] = "unknown"
+
+                if engine_matches:
+                    record["rules"] = engine_matches
+                if heuristics:
+                    record["heuristics"] = heuristics
+
+                results.append(record)
+
+        except Exception as e:
+            # Never let a single broken <script> kill the whole analysis
+            results.append({
+                "type": "unknown",
+                "heuristics": [f"Script analysis error: {e}"]
+            })
+
+    return results
+
+
+async def fetch_page_artifacts(url: str, storage_dir: Path, engine=None) -> Dict[str, Any]:
+    """
+    Fetch page artifacts and save them in a UUID-based directory.
+
+    Args:
+        url (str): URL to analyze.
+        storage_dir (Path): Base /data path.
+        engine: Optional rules engine instance (from app.config["RULE_ENGINE"]).
+    """
+    run_uuid = str(uuid.uuid4())
+    run_dir = storage_dir / run_uuid
+    run_dir.mkdir(parents=True, exist_ok=True)
+
+    screenshot_path = run_dir / "screenshot.png"
+    source_path = run_dir / "source.txt"
+    results_path = run_dir / "results.json"
+
+    redirects = []
+    downloads = []
+    scripts = []
+
+    async with async_playwright() as pw:
+        browser = await pw.chromium.launch(
+            headless=True,
+            args=["--no-sandbox", "--disable-dev-shm-usage", "--disable-blink-features=AutomationControlled"]
+        )
+        context = await browser.new_context(
+            viewport={"width": 1920, "height": 1080},
+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
+            java_script_enabled=True,
+            locale="en-US"
+        )
+        page = await context.new_page()
+
+        # Event handlers
+        page.on("response", lambda resp: redirects.append({"status": resp.status, "url": resp.url}) if 300 <= resp.status <= 399 else None)
+        page.on("download", lambda d: downloads.append({"url": d.url, "suggested_filename": d.suggested_filename}))
+        page.on("request", lambda r: scripts.append(r.url) if r.url.endswith((".js", ".vbs", ".hta")) else None)
+
+        try:
+            await page.goto(url, wait_until="networkidle", timeout=60000)
+            final_url = page.url
+            await page.screenshot(path=str(screenshot_path), full_page=True)
+            html = await page.content()
+            safe_write(source_path, html)
+        except PWTimeoutError:
+            final_url = page.url
+            safe_write(source_path, "Page did not fully load (timeout)")
+            await page.screenshot(path=str(screenshot_path), full_page=True)
+
+        await context.close()
+        await browser.close()
+
+    html_content = source_path.read_text(encoding="utf-8")
+    forms_info = analyze_forms(html_content, final_url)
+    suspicious_scripts = analyze_scripts(html_content, base_url=final_url, engine=engine)
+
+    enrichment = enrich_url(url)
+
+    result = {
+        "uuid": run_uuid,
+        "submitted_url": url,
+        "final_url": final_url,
+        "redirects": redirects,
+        "downloads": downloads,
+        "scripts": scripts,
+        "forms": forms_info,
+        "suspicious_scripts": suspicious_scripts,
+        "enrichment": enrichment
+    }
+
+    safe_write(results_path, json.dumps(result, indent=2))
+    return result
--- a/app/config/bec_words.yaml
+++ b/app/config/bec_words.yaml
@@ -0,0 +1,5 @@
+words:
+  - "reset password"
+  - "open document"
+  - "view document"
+  - "verify account"
--- a/app/config/settings.yaml
+++ b/app/config/settings.yaml
@@ -0,0 +1,9 @@
+app:
+  name: SneakyScope
+  version_major: 0
+  version_minor: 1
+
+cache:
+  recent_runs_count: 10
+  whois_cache_days: 7
+  geoip_cache_days: 7
--- a/app/config/suspicious_rules.yaml
+++ b/app/config/suspicious_rules.yaml
@@ -0,0 +1,80 @@
+# config/suspicious_rules.yaml
+# Baseline suspicious rules for SneakyScope
+# Organized by category: script, form, text
+# Extend these with more specific rules as needed
+
+# --- Script Rules ---
+- name: eval_usage
+  description: "Use of eval() in script"
+  category: script
+  type: regex
+  pattern: "\\beval\\("
+
+- name: document_write
+  description: "Use of document.write (often abused in malicious injections)"
+  category: script
+  type: regex
+  pattern: "document\\.write\\("
+
+- name: inline_event_handler
+  description: "Inline event handler detected (onload, onclick, etc.)"
+  category: script
+  type: regex
+  pattern: "on(load|click|error|mouseover|keydown)\\s*="
+
+- name: obfuscated_encoding
+  description: "Suspicious use of atob() or btoa() (base64 encoding/decoding)"
+  category: script
+  type: regex
+  pattern: "\\b(atob|btoa)\\("
+
+- name: suspicious_iframe
+  description: "Iframe usage in script (possible phishing/malvertising)"
+  category: script
+  type: regex
+  pattern: "<iframe[^>]*>"
+
+# --- Form Rules ---
+- name: suspicious_form_action
+  description: "Form action with external URL (potential credential exfiltration)"
+  category: form
+  type: regex
+  pattern: "<form[^>]*action=['\"]http"
+
+- name: hidden_inputs
+  description: "Form with hidden inputs (possible credential harvesting)"
+  category: form
+  type: regex
+  pattern: "<input[^>]*type=['\"]hidden"
+
+- name: password_field
+  description: "Form requesting password field"
+  category: form
+  type: regex
+  pattern: "<input[^>]*type=['\"]password"
+
+# --- Text Rules (Social Engineering / BEC) ---
+- name: urgent_request
+  description: "Language suggesting urgency (common in phishing/BEC)"
+  category: text
+  type: regex
+  pattern: "(urgent|immediately|asap|action required)"
+
+- name: account_suspension
+  description: "Threat of account suspension/closure"
+  category: text
+  type: regex
+  pattern: "(account.*suspend|account.*close|verify.*account)"
+
+- name: financial_request
+  description: "Request for gift cards, wire transfer, or money"
+  category: text
+  type: regex
+  pattern: "(gift card|wire transfer|bank account|bitcoin|payment required)"
+
+- name: credential_reset
+  description: "Password reset or credential reset wording"
+  category: text
+  type: regex
+  pattern: "(reset password|update credentials|login to verify)"
+
--- a/app/enrichment.py
+++ b/app/enrichment.py
@@ -0,0 +1,137 @@
+import logging
+from pathlib import Path
+from urllib.parse import urlparse
+import requests
+import yaml
+import whois
+from datetime import datetime
+from ipaddress import ip_address
+import socket
+
+# Local imports
+from .utils.cache_db import get_cache
+from .utils.settings import get_settings
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+
+# Init cache
+cache = get_cache("/data/cache.db")
+settings = get_settings()
+
+# Load BEC words
+BEC_WORDS_FILE = Path(__file__).parent.parent / "config" / "bec_words.yaml"
+if BEC_WORDS_FILE.exists():
+    with open(BEC_WORDS_FILE, "r", encoding="utf-8") as f:
+        BEC_WORDS = yaml.safe_load(f).get("words", [])
+else:
+    BEC_WORDS = []
+
+# 24 hours * 60 minutes
+days = 24 * 60 
+
+GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
+WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
+
+def enrich_url(url: str) -> dict:
+    """Perform WHOIS, GeoIP, and BEC word enrichment."""
+    result = {}
+
+    # Extract hostname
+    parsed = urlparse(url)
+    hostname = parsed.hostname or url  # fallback if parsing fails
+
+    # --- WHOIS ---
+    result.update(enrich_whois(hostname))
+
+    # --- GeoIP ---
+    result["geoip"] = enrich_geoip(hostname)
+
+    # --- BEC Words ---
+    result["bec_words"] = [w for w in BEC_WORDS if w.lower() in url.lower()]
+
+    return result
+
+
+def enrich_whois(hostname: str) -> dict:
+    """Fetch WHOIS info using python-whois with safe type handling."""
+    cache_key = f"whois:{hostname}"
+    cached = cache.read(cache_key)
+    if cached:
+        logging.info(f"[CACHE HIT] for WHOIS: {hostname}")
+        return cached
+
+    logging.info(f"[CACHE MISS] for WHOIS: {hostname}")
+    result = {}
+    try:
+        w = whois.whois(hostname)
+
+        def format_dt(val):
+            if isinstance(val, list):
+                return ", ".join([v.strftime("%Y-%m-%d %H:%M:%S") if isinstance(v, datetime) else str(v) for v in val])
+            elif isinstance(val, datetime):
+                return val.strftime("%Y-%m-%d %H:%M:%S")
+            elif val is None:
+                return "Possible Privacy"
+            else:
+                return str(val)
+        
+        result["whois"] = {
+            "registrar": format_dt(getattr(w, "registrar", None)),
+            "creation_date": format_dt(getattr(w, "creation_date", None)),
+            "expiration_date": format_dt(getattr(w, "expiration_date", None)),
+            "owner": format_dt(getattr(w, "org", None))
+        }
+
+    except Exception as e:
+        logging.warning(f"WHOIS lookup failed for {hostname}: {e}")
+        try:
+            # fallback raw whois text
+            import subprocess
+            raw_output = subprocess.check_output(["whois", hostname], encoding="utf-8", errors="ignore")
+            result["whois"] = {}
+            result["raw_whois"] = raw_output
+        except Exception as raw_e:
+            logging.error(f"Raw WHOIS also failed: {raw_e}")
+            result["whois"] = {}
+            result["raw_whois"] = "N/A"
+
+    cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
+    return result
+
+
+def enrich_geoip(hostname: str) -> dict:
+    """Resolve hostname to IPs and fetch info from ip-api.com."""
+    geo_info = {}
+    ips = extract_ips_from_url(hostname)
+    for ip in ips:
+        ip_str = str(ip)
+        cache_key = f"geoip:{ip_str}"
+        cached = cache.read(cache_key)
+        if cached:
+            logging.info(f"[CACHE HIT] for GEOIP: {ip}")
+            geo_info[ip_str] = cached
+            continue
+
+        logging.info(f"[CACHE MISS] for GEOIP: {ip}")
+        try:
+            resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
+            if resp.status_code == 200:
+                geo_info[ip_str] = resp.json()
+            else:
+                geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"}
+        except Exception as e:
+            geo_info[ip_str] = {"error": str(e)}
+
+        cache.create(cache_key, geo_info[ip_str],GEOIP_DEFAULT_TTL)
+
+    return geo_info
+
+
+def extract_ips_from_url(hostname: str):
+    """Resolve hostname to IPs."""
+    try:
+        info = socket.getaddrinfo(hostname, None)
+        return list({ip_address(x[4][0]) for x in info})
+    except Exception:
+        return []
--- a/app/routes.py
+++ b/app/routes.py
@@ -0,0 +1,125 @@
+import os
+import json
+import asyncio
+from pathlib import Path
+from datetime import datetime
+from flask import Blueprint, render_template, request, redirect, url_for, flash, current_app, send_file, abort
+
+from .browser import fetch_page_artifacts
+from .enrichment import enrich_url
+from .utils.settings import get_settings
+from .utils.io_helpers import get_recent_results
+
+bp = Blueprint("main", __name__)
+
+settings = get_settings()
+app_name = settings.app.name
+app_version = f"v {settings.app.version_major}.{settings.app.version_minor}"
+
+# --- context processor ---
+@bp.context_processor
+def inject_app_info():
+    """Inject app name and version into all templates."""
+    return {
+        "app_name": app_name,
+        "app_version": app_version
+    }
+
+@bp.route("/", methods=["GET"])
+def index():
+    """
+    Render the landing page with optional 'recent_results' list.
+
+    The number of recent runs is controlled via settings.cache.recent_runs_count (int).
+    Falls back to 10 if not present or invalid.
+    """
+    # Resolve SANDBOX_STORAGE from app config
+    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
+
+    # Pull recent count from settings with a safe fallback
+    try:
+        # settings is already initialized at module import in your file
+        recent_count = int(getattr(settings.cache, "recent_runs_count", 10))
+        if recent_count < 0:
+            recent_count = 0
+    except Exception:
+        recent_count = 10
+
+    # Build the recent list (non-fatal if storage is empty or unreadable)
+    recent_results = get_recent_results(storage, recent_count, current_app.logger)
+
+    # Pass to template; your index.html will hide the card if list is empty
+    return render_template("index.html", recent_results=recent_results)
+
+
+@bp.route("/analyze", methods=["POST"])
+def analyze():
+    url = request.form.get("url", "").strip()
+    current_app.logger.info(f"[*] Analyzing {url}")
+    if not url:
+        flash("Please enter a URL.", "error")
+        return redirect(url_for("main.index"))
+
+    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
+    storage.mkdir(parents=True, exist_ok=True)
+
+    try:
+        engine = current_app.config.get("RULE_ENGINE")
+        result = asyncio.run(fetch_page_artifacts(url, storage, engine=engine))
+        # result = asyncio.run(fetch_page_artifacts(url, storage))
+        current_app.logger.info(f"[+] Analysis done for {url}")
+    except Exception as e:
+        flash(f"Analysis failed: {e}", "error")
+        current_app.logger.error(f"Analysis failed for {url}: {e}")
+        return redirect(url_for("main.index"))
+
+    # Add enrichment safely
+    try:
+        enrichment = enrich_url(url)
+        result["enrichment"] = enrichment
+        current_app.logger.info(f"[+] Enrichment added for {url}")
+    except Exception as e:
+        result["enrichment"] = {}
+        current_app.logger.warning(f"[!] Enrichment failed for {url}: {e}")
+
+    # Redirect to permalink page for this run
+    return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
+
+@bp.route("/results/<run_uuid>", methods=["GET"])
+def view_result(run_uuid: str):
+    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
+    run_dir = storage / run_uuid
+    results_path = run_dir / "results.json"
+
+    if not results_path.exists():
+        current_app.logger.error(f"Results not found for UUID: {run_uuid}")
+        abort(404)
+
+    with open(results_path, "r", encoding="utf-8") as f:
+        result = json.load(f)
+
+    # Pass the UUID to the template for artifact links
+    result["uuid"] = run_uuid
+
+    return render_template("result.html", **result)
+
+@bp.route("/artifacts/<run_uuid>/<filename>", methods=["GET"])
+def artifacts(run_uuid: str, filename: str):
+    storage = Path(current_app.config["SANDBOX_STORAGE"]).resolve()
+    run_dir = storage / run_uuid
+    full_path = run_dir / filename
+
+    # Prevent directory traversal
+    try:
+        full_path.relative_to(run_dir.resolve())
+    except ValueError:
+        current_app.logger.warning(f"Directory traversal attempt: {filename}")
+        abort(404)
+
+    if not full_path.exists():
+        current_app.logger.error(f"Artifact not found: {filename} for UUID {run_uuid}")
+        abort(404)
+
+    return send_file(full_path)
+
+
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -0,0 +1,288 @@
+:root {
+    font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
+}
+
+body {
+    margin: 0;
+    background: #0b0f14;
+    color: #e6edf3;
+}
+
+header, footer {
+    padding: 1rem 1.25rem;
+    background: #0f1720;
+    border-bottom: 1px solid #1f2a36;
+}
+
+/* ===== main: now full-width (no 960px cap) ===== */
+main {
+    padding: 1.5rem 2rem;     /* a bit more horizontal breathing room */
+    max-width: 100%;          /* remove fixed cap */
+    width: 100%;
+    margin: 0;                /* no auto centering since we’re full-width */
+    box-sizing: border-box;
+}
+
+.card {
+    background: #111826;
+    padding: 1rem;
+    border: 1px solid #1f2a36;
+    border-radius: 12px;
+    margin-bottom: 1rem;
+}
+
+label {
+    display: block;
+    margin-bottom: 0.5rem;
+}
+
+input[type=url] {
+    width: 100%;
+    padding: 0.7rem;
+    border-radius: 8px;
+    border: 1px solid #243041;
+    background: #0b1220;
+    color: #e6edf3;
+}
+
+button, .button {
+    display: inline-block;
+    margin-top: 0.75rem;
+    padding: 0.6rem 1rem;
+    border-radius: 8px;
+    border: 1px solid #243041;
+    background: #1a2535;
+    color: #e6edf3;
+    text-decoration: none;
+}
+
+.flash {
+    list-style: none;
+    padding: 0.5rem 1rem;
+}
+
+.flash .error {
+    color: #ff6b6b;
+}
+
+.grid {
+    display: grid;
+    grid-template-columns: 150px 1fr;
+    gap: 0.5rem 1rem;
+}
+
+img {
+    max-width: 100%;
+    height: auto;
+    border-radius: 8px;
+    border: 1px solid #243041;
+}
+
+pre.code {
+    white-space: pre-wrap;
+    word-break: break-all;
+    background: #0b1220;
+    padding: 0.75rem;
+    border-radius: 8px;
+    border: 1px solid #243041;
+}
+
+/* Links */
+a {
+    color: #7dd3fc; /* Soft cyan for dark background */
+    text-decoration: underline;
+}
+
+a:hover {
+    color: #38bdf8; /* Slightly brighter on hover */
+}
+
+/* Accordion / details summary */
+details summary {
+    cursor: pointer;
+    padding: 0.5rem;
+    font-weight: bold;
+    border-radius: 8px;
+    background: #111826;
+    border: 1px solid #1f2a36;
+    margin-bottom: 0.5rem;
+    transition: background 0.3s ease;
+}
+
+details[open] summary {
+    background: #1a2535; /* Slightly lighter when expanded */
+}
+
+details > ul, details > table {
+    padding-left: 1rem;
+    margin: 0.5rem 0;
+}
+
+/* Highlight flagged forms */
+details.flagged summary {
+    border-left: 4px solid #ff6b6b; /* Red accent for flagged forms */
+}
+
+/* Smooth collapse/expand */
+details ul, details p {
+    transition: all 0.3s ease;
+}
+
+/* Enrichment / GeoIP / Forms / Redirects Tables */
+.enrichment-table {
+    width: 100%;
+    border-collapse: collapse;
+    margin-bottom: 1rem;
+}
+
+.enrichment-table th,
+.enrichment-table td {
+    border: 1px solid #243041;
+    padding: 0.5rem;
+    vertical-align: top;
+}
+
+.enrichment-table th {
+    background: #111826;
+    text-align: left;
+}
+
+.enrichment-table td {
+    width: auto; /* browser resizes naturally */
+    word-break: break-word;
+}
+
+/* Scripts Table Special Handling */
+.scripts-table pre.code {
+    margin: 0;
+    padding: 0.25rem;
+    font-size: 0.9rem;
+}
+
+/* Hover effects for table rows */
+.enrichment-table tbody tr:hover {
+    background: #1f2a36;
+}
+
+/* Card table headings */
+.enrichment-table thead th {
+    border-bottom: 2px solid #243041;
+}
+
+/* Ensure nested tables don't overflow */
+.card table {
+    table-layout: auto;
+    word-break: break-word;
+}
+
+/* ============================
+   Results Table (3+ columns)
+   - Visual style matches .enrichment-table
+   - Adds better wrapping for long strings (URL/UUID)
+   - Right-aligns timestamps for scannability
+   ============================ */
+
+.results-table {
+  width: 100%;
+  border-collapse: collapse;
+  background: #111826;         /* match card background */
+  border: 1px solid #1f2a36;   /* subtle border like cards */
+  border-radius: 12px;         /* rounded corners */
+  overflow: hidden;            /* clip the rounded corners */
+  table-layout: auto;          /* allow natural column sizing */
+}
+
+/* Header styling */
+.results-table thead th {
+  padding: 0.6rem 0.75rem;
+  background: #0f1720;         /* match header tone */
+  border-bottom: 1px solid #1f2a36;
+  text-align: left;
+  font-weight: 600;
+  white-space: nowrap;         /* keep short headers on one line */
+}
+
+/* Body cells */
+.results-table tbody td {
+  padding: 0.6rem 0.75rem;
+  border-top: 1px solid #1f2a36;
+  vertical-align: top;
+  text-align: left;
+}
+
+/* Zebra rows for readability (optional) */
+.results-table tbody tr:nth-child(odd) {
+  background: #0d1522;         /* slight contrast row */
+}
+
+/* Links inside table should inherit your global link colors */
+.results-table a {
+  text-decoration: underline;
+}
+
+/* ---- Column-specific tweaks ---- */
+
+/* URL column: allow wrapping of long URLs without blowing the layout */
+.results-table td.url,
+.results-table td.url a {
+  word-wrap: break-word;       /* legacy support */
+  overflow-wrap: anywhere;     /* modern wrapping for long URLs */
+  word-break: break-word;
+}
+
+/* UUID column: force wrap to avoid overflow */
+.results-table td.uuid {
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+  word-break: break-all;       /* split at any point to keep table narrow */
+  max-width: 28ch;             /* reasonable width to avoid stretching */
+}
+
+/* Timestamp column: align right and keep on a single line */
+.results-table td.timestamp {
+  text-align: right;
+  white-space: nowrap;         /* keep ISO timestamps on one line */
+}
+
+/* Optional: make the newest (first) row stand out subtly */
+.results-table tbody tr:first-child {
+  box-shadow: inset 0 0 0 1px #243041;
+}
+
+/* Optional: small, subtle buttons in table cells (e.g., copy UUID) */
+.results-table .copy-btn {
+  margin-left: 0.4rem;
+  padding: 0.2rem 0.45rem;
+  border-radius: 6px;
+  border: 1px solid #243041;
+  background: #1a2535;
+  color: #e6edf3;
+  cursor: pointer;
+  line-height: 1;
+  font-size: 0.9rem;
+}
+
+.results-table .copy-btn:hover {
+  filter: brightness(1.1);
+}
+
+/* ===== Responsive niceties for very small screens ===== */
+@media (max-width: 768px) {
+  main {
+    padding: 1rem; /* a tad tighter on mobile */
+  }
+
+  .enrichment-table,
+  .results-table {
+    display: block;
+    overflow-x: auto;   /* allow horizontal scroll if needed */
+    white-space: nowrap;
+  }
+}
+
+.scripts-table td ul {
+  margin: 0.25rem 0 0.25rem 1rem;
+  padding-left: 1rem;
+}
+.scripts-table td small {
+  opacity: 0.85;
+}
--- a/app/templates/base.html
+++ b/app/templates/base.html
@@ -0,0 +1,33 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>{{ app_name }} {{ app_version }}</title>
+    <link rel="stylesheet" href="https://unpkg.com/sanitize.css" />
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}" />
+  </head>
+  <body>
+    <header>
+      <h1>{{ app_name }} {{ app_version }}</h1>
+    </header>
+
+    {% with messages = get_flashed_messages(with_categories=true) %}
+      {% if messages %}
+        <ul class="flash">
+          {% for category, message in messages %}
+            <li class="{{ category }}">{{ message }}</li>
+          {% endfor %}
+        </ul>
+      {% endif %}
+    {% endwith %}
+
+    <main>
+      {% block content %}{% endblock %}
+    </main>
+
+    <footer>
+      <small>{{ app_name }} - A self-hosted URL analysis sandbox - {{ app_version }}</small>
+    </footer>
+  </body>
+</html>
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -0,0 +1,149 @@
+{% extends 'base.html' %}
+{% block content %}
+
+<!-- Analysis Form -->
+<form id="analyze-form" method="post" action="{{ url_for('main.analyze') }}" class="card">
+  <h2>Analyze a URL</h2>
+  <label for="url">Enter a URL to analyze</label>
+  <input id="url" name="url" type="url" placeholder="https://example.com" required />
+  <button type="submit">Analyze</button>
+</form>
+
+<!-- Recent Results (optional; shown only if recent_results provided) -->
+{% if recent_results %}
+<div class="card" id="recent-results">
+  <h2>Recent Results</h2>
+  <table class="results-table">
+    <thead>
+      <tr>
+        <th>Timestamp</th>
+        <th>URL</th>
+        <th>UUID</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for r in recent_results %}
+      <tr>
+        <td class="timestamp">
+          {% if r.timestamp %}
+            {{ r.timestamp }}
+          {% else %}
+            N/A
+          {% endif %}
+        </td>
+        <td class="url">
+          <a href="{{ url_for('main.view_result', run_uuid=r.uuid) }}">
+            {{ r.final_url or r.submitted_url }}
+          </a>
+        </td>
+        <td class="uuid">
+          <code id="uuid-{{ loop.index }}">{{ r.uuid }}</code>
+          <button 
+            type="button" 
+            class="copy-btn" 
+            data-target="uuid-{{ loop.index }}">
+            📋
+          </button>
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+
+</div>
+{% endif %}
+
+<!-- Spinner Modal -->
+<div id="spinner-modal" style="
+    display:none;
+    opacity:0;
+    position:fixed;
+    top:0;
+    left:0;
+    width:100%;
+    height:100%;
+    background:rgba(0,0,0,0.7);
+    color:#fff;
+    font-size:1.5rem;
+    text-align:center;
+    padding-top:20%;
+    z-index:9999;
+    transition: opacity 0.3s ease;
+">
+  <div>
+    <div class="loader" style="
+        border: 8px solid #f3f3f3;
+        border-top: 8px solid #1a2535;
+        border-radius: 50%;
+        width: 60px;
+        height: 60px;
+        animation: spin 1s linear infinite;
+        margin: 0 auto 1rem auto;
+    "></div>
+    Analyzing website…
+  </div>
+</div>
+
+<style>
+@keyframes spin {
+  0% { transform: rotate(0deg); }
+  100% { transform: rotate(360deg); }
+}
+</style>
+
+<script>
+const form = document.getElementById('analyze-form');
+const modal = document.getElementById('spinner-modal');
+
+function showModal() {
+  modal.style.display = 'block';
+  requestAnimationFrame(() => {
+    modal.style.opacity = '1';
+  });
+}
+
+function hideModal() {
+  modal.style.opacity = '0';
+  modal.addEventListener('transitionend', () => {
+    modal.style.display = 'none';
+  }, { once: true });
+}
+
+// Hide spinner on initial load / back navigation
+window.addEventListener('pageshow', () => {
+  modal.style.opacity = '0';
+  modal.style.display = 'none';
+});
+
+form.addEventListener('submit', (e) => {
+  showModal();
+  // Prevent double submission
+  form.querySelector('button').disabled = true;
+
+  // Allow browser to render the modal before submitting
+  requestAnimationFrame(() => form.submit());
+  e.preventDefault();
+});
+</script>
+
+<script>
+document.addEventListener('DOMContentLoaded', () => {
+  const buttons = document.querySelectorAll('.copy-btn');
+  buttons.forEach(btn => {
+    btn.addEventListener('click', () => {
+      const targetId = btn.getAttribute('data-target');
+      const uuidText = document.getElementById(targetId).innerText;
+
+      navigator.clipboard.writeText(uuidText).then(() => {
+        // Give quick feedback
+        btn.textContent = '✅';
+        setTimeout(() => { btn.textContent = '📋'; }, 1500);
+      }).catch(err => {
+        console.error('Failed to copy UUID:', err);
+      });
+    });
+  });
+});
+</script>
+
+{% endblock %}
--- a/app/templates/result.html
+++ b/app/templates/result.html
@@ -0,0 +1,268 @@
+{% extends "base.html" %}
+{% block content %}
+
+<!-- Top Jump List -->
+<div class="card" id="top-jump-list">
+    <h2>Jump to Section</h2>
+    <ul>
+        <li><a href="/">Analyse Another Page</a></li>
+        <li><a href="#url-overview">URL Overview</a></li>
+        <li><a href="#enrichment">Enrichment</a></li>
+        <li><a href="#redirects">Redirects</a></li>
+        <li><a href="#forms">Forms</a></li>
+        <li><a href="#scripts">Suspicious Scripts</a></li>
+        <li><a href="#screenshot">Screenshot</a></li>
+        <li><a href="#source">Source</a></li>
+    </ul>
+</div>
+
+<!-- URL Overview -->
+<div class="card" id="url-overview">
+    <h2>URL Overview</h2>
+    <p><strong>Submitted URL:</strong> {{ submitted_url }}</p>
+    <p><strong>Final URL:</strong> <a href="{{ final_url }}" target="_blank">{{ final_url }}</a></p>
+    <p><strong>Permalink:</strong> 
+        <a href="{{ url_for('main.view_result', run_uuid=uuid, _external=True) }}">
+            {{ request.host_url }}results/{{ uuid }}
+        </a>
+    </p>
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+<!-- Enrichment -->
+<div class="card" id="enrichment">
+    <h2>Enrichment</h2>
+
+    <!-- WHOIS -->
+    {% if enrichment.whois %}
+    <h3>WHOIS</h3>
+    <table class="enrichment-table">
+        <thead>
+            <tr>
+                <th>Field</th>
+                <th>Value</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for k, v in enrichment.whois.items() %}
+            <tr>
+                <td>{{ k.replace('_', ' ').title() }}</td>
+                <td>{{ v }}</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+    {% endif %}
+
+    {% if enrichment.raw_whois %}
+    <h3>Raw WHOIS</h3>
+    <pre class="code">{{ enrichment.raw_whois }}</pre>
+    {% endif %}
+
+    <!-- GeoIP / IP-API -->
+    {% if enrichment.geoip %}
+    <h3>GeoIP</h3>
+    {% for ip, info in enrichment.geoip.items() %}
+    <details class="card" style="padding:0.5rem; margin-bottom:0.5rem;">
+        <summary>{{ ip }}</summary>
+        <table class="enrichment-table">
+            <tbody>
+                {% for key, val in info.items() %}
+                <tr>
+                    <td>{{ key.replace('_', ' ').title() }}</td>
+                    <td>{{ val }}</td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+    </details>
+    {% endfor %}
+    {% endif %}
+
+    <!-- BEC Words -->
+    {% if enrichment.bec_words %}
+    <h3>BEC Words Detected</h3>
+    <table class="enrichment-table">
+        <thead>
+            <tr><th>Word</th></tr>
+        </thead>
+        <tbody>
+            {% for word in enrichment.bec_words %}
+            <tr><td>{{ word }}</td></tr>
+            {% endfor %}
+        </tbody>
+    </table>
+    {% endif %}
+
+    {% if not enrichment.whois and not enrichment.raw_whois and not enrichment.geoip and not enrichment.bec_words %}
+    <p>No enrichment data available.</p>
+    {% endif %}
+
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+<!-- Redirects -->
+<div class="card" id="redirects">
+    <h2>Redirects</h2>
+    {% if redirects %}
+    <table class="enrichment-table">
+        <thead>
+            <tr>
+                <th>Status</th>
+                <th>URL</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for r in redirects %}
+            <tr>
+                <td>{{ r.status }}</td>
+                <td><a href="{{ r.url }}" target="_blank">{{ r.url }}</a></td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+    {% else %}
+    <p>No redirects detected.</p>
+    {% endif %}
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+<!-- Forms -->
+<div class="card" id="forms">
+    <h2>Forms</h2>
+    {% if forms %}
+    {% for form in forms %}
+    <details class="card {% if form.flagged %}flagged{% endif %}" style="padding:0.5rem; margin-bottom:0.5rem;">
+        <summary>{{ form.status }} — Action: {{ form.action }} ({{ form.method | upper }})</summary>
+        <table class="enrichment-table">
+            <thead>
+                <tr>
+                    <th>Input Name</th>
+                    <th>Type</th>
+                </tr>
+            </thead>
+            <tbody>
+                {% for inp in form.inputs %}
+                <tr>
+                    <td>{{ inp.name }}</td>
+                    <td>{{ inp.type }}</td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+        {% if form.flagged %}
+        <p><strong>Flag Reasons:</strong></p>
+        <ul>
+            {% for reason in form.flag_reasons %}
+            <li>{{ reason }}</li>
+            {% endfor %}
+        </ul>
+        {% endif %}
+    </details>
+    {% endfor %}
+    {% else %}
+    <p>No forms detected.</p>
+    {% endif %}
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+<!-- Suspicious Scripts -->
+<div class="card" id="scripts">
+  <h2>Suspicious Scripts</h2>
+
+  {% if suspicious_scripts %}
+  <table class="enrichment-table scripts-table">
+    <thead>
+      <tr>
+        <th>Type</th>
+        <th>Source URL</th>
+        <th>Content Snippet</th>
+        <th>Matches (Rules & Heuristics)</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for s in suspicious_scripts %}
+      <tr>
+        <!-- Type -->
+        <td>{{ s.type or 'unknown' }}</td>
+
+        <!-- Source URL -->
+        <td>
+          {% if s.src %}
+            <a href="{{ s.src }}" target="_blank">{{ s.src }}</a>
+          {% else %}
+            N/A
+          {% endif %}
+        </td>
+
+        <!-- Inline content snippet (collapsible) -->
+        <td>
+          {% if s.content_snippet %}
+            <details>
+              <summary>View snippet</summary>
+              <pre class="code">{{ s.content_snippet }}</pre>
+            </details>
+          {% else %}
+            N/A
+          {% endif %}
+        </td>
+
+        <!-- Rules & Heuristics -->
+        <td>
+          {% set has_rules = s.rules and s.rules|length > 0 %}
+          {% set has_heur = s.heuristics and s.heuristics|length > 0 %}
+
+          {% if has_rules %}
+            <strong>Rules</strong>
+            <ul>
+              {% for r in s.rules %}
+                <li title="{{ r.description or '' }}">
+                  {{ r.name }}
+                  {% if r.description %}
+                    <small>— {{ r.description }}</small>
+                  {% endif %}
+                </li>
+              {% endfor %}
+            </ul>
+          {% endif %}
+
+          {% if has_heur %}
+            <strong>Heuristics</strong>
+            <ul>
+              {% for h in s.heuristics %}
+                <li>{{ h }}</li>
+              {% endfor %}
+            </ul>
+          {% endif %}
+
+          {% if not has_rules and not has_heur %}
+            N/A
+          {% endif %}
+        </td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  {% else %}
+    <p>No suspicious scripts detected.</p>
+  {% endif %}
+
+  <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+
+<!-- Screenshot -->
+<div class="card" id="screenshot">
+    <h2>Screenshot</h2>
+    <img src="{{ url_for('main.artifacts', run_uuid=uuid, filename='screenshot.png') }}" alt="Screenshot">
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+<!-- Source -->
+<div class="card" id="source">
+    <h2>Source</h2>
+    <p><a href="{{ url_for('main.artifacts', run_uuid=uuid, filename='source.txt') }}" target="_blank">View Source</a></p>
+    <p><a href="#top-jump-list">Back to top</a></p>
+</div>
+
+{% endblock %}
--- a/app/utils/cache_db.py
+++ b/app/utils/cache_db.py
@@ -0,0 +1,128 @@
+import json
+import time
+import sqlite3
+import threading
+import functools
+from pathlib import Path
+from typing import Any, Optional
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = Any
+
+def singleton_loader(func):
+    """Ensure only one cache instance exists."""
+    cache: dict[str, T] = {}
+    lock = threading.Lock()
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        with lock:
+            if func.__name__ not in cache:
+                cache[func.__name__] = func(*args, **kwargs)
+            return cache[func.__name__]
+    return wrapper
+
+# ---------- CACHE CLASS ----------
+class CacheDB:
+    """SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support."""
+
+    TABLE_NAME = "cache"
+
+    def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440):
+        """
+        :param default_expiration_minutes: default expiration in minutes (default 24 hours)
+        """
+        self.db_path = Path(db_path)
+        self.default_expiration = default_expiration_minutes * 60  # convert minutes -> seconds
+
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+        self._lock = threading.Lock()
+        self._create_table()
+
+    def _create_table(self):
+        """Create the cache table if it doesn't exist."""
+        with self._lock:
+            self.conn.execute(f"""
+                CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} (
+                    key TEXT PRIMARY KEY,
+                    value TEXT,
+                    expires_at INTEGER
+                )
+            """)
+            self.conn.commit()
+
+    def _cleanup_expired(self):
+        """Delete expired rows."""
+        now = int(time.time())
+        with self._lock:
+            self.conn.execute(
+                f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,)
+            )
+            self.conn.commit()
+
+    # ---------- CRUD ----------
+    def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
+        """Insert or update a cache entry. expires_in_minutes overrides default expiration."""
+        self._cleanup_expired()
+        if expires_in_minutes is None:
+            expires_in_seconds = self.default_expiration
+        else:
+            expires_in_seconds = expires_in_minutes * 60
+        expires_at = int(time.time()) + expires_in_seconds
+
+        value_json = json.dumps(value)
+        with self._lock:
+            self.conn.execute(
+                f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)",
+                (key, value_json, expires_at)
+            )
+            self.conn.commit()
+
+    def read(self, key: str) -> Optional[Any]:
+        """Read a cache entry. Auto-cleans expired items."""
+        self._cleanup_expired()
+        with self._lock:
+            row = self.conn.execute(
+                f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,)
+            ).fetchone()
+            if not row:
+                return None
+            return json.loads(row["value"])
+
+    def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
+        """Update a cache entry. Optional expiration in minutes."""
+        if expires_in_minutes is None:
+            expires_in_seconds = self.default_expiration
+        else:
+            expires_in_seconds = expires_in_minutes * 60
+        expires_at = int(time.time()) + expires_in_seconds
+
+        value_json = json.dumps(value)
+        with self._lock:
+            self.conn.execute(
+                f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?",
+                (value_json, expires_at, key)
+            )
+            self.conn.commit()
+
+    def delete(self, key: str):
+        with self._lock:
+            self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,))
+            self.conn.commit()
+
+    def clear(self):
+        """Delete all rows from the cache table."""
+        with self._lock:
+            self.conn.execute(f"DELETE FROM {self.TABLE_NAME}")
+            self.conn.commit()
+
+    def close(self):
+        self.conn.close()
+
+
+# ---------- SINGLETON INSTANCE ----------
+@singleton_loader
+def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB:
+    return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes)
--- a/app/utils/io_helpers.py
+++ b/app/utils/io_helpers.py
@@ -0,0 +1,115 @@
+import json
+import logging
+from pathlib import Path
+from datetime import datetime
+
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+
+def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
+    """Write content to a file safely with logging."""
+    path = Path(path)
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, mode, encoding=encoding) as f:
+            f.write(content)
+        logging.info(f"[+] Wrote file: {path}")
+    except Exception as e:
+        logging.error(f"[!] Failed writing {path}: {e}")
+        raise
+
+def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
+    """
+    Scan the SANDBOX_STORAGE directory for run folders (UUIDs), read each
+    run's results.json, and return the most recent N entries by file mtime.
+
+    Args:
+        storage_dir (Path): Base path where UUID run directories live.
+        limit (int): Maximum number of recent items to return.
+        logger: Flask or stdlib logger to record non-fatal issues.
+
+    Returns:
+        list[dict]: Each item includes:
+            {
+              "uuid": str,
+              "submitted_url": str | None,
+              "final_url": str | None,
+              "timestamp": str (ISO 8601),
+            }
+        Returns an empty list if no runs are found or on error.
+    """
+    items = []
+
+    try:
+        # Ensure the storage dir exists
+        storage_dir.mkdir(parents=True, exist_ok=True)
+
+        # Iterate directories directly under storage_dir
+        for entry in storage_dir.iterdir():
+            try:
+                if not entry.is_dir():
+                    # Skip non-directories
+                    continue
+
+                # Expect results.json inside each UUID directory
+                results_path = entry / "results.json"
+                if not results_path.exists():
+                    # Skip folders without results.json
+                    continue
+
+                # Read file metadata (mtime) for sorting and display
+                stat_info = results_path.stat()
+                mtime_epoch = stat_info.st_mtime
+                mtime_iso = datetime.fromtimestamp(mtime_epoch).isoformat(timespec="seconds")
+
+                # Parse a small subset of the JSON for display
+                submitted_url = None
+                final_url = None
+                run_uuid = entry.name
+
+                try:
+                    with open(results_path, "r", encoding="utf-8") as f:
+                        data = json.load(f)
+
+                    if isinstance(data, dict):
+                        submitted_url = data.get("submitted_url")
+                        final_url = data.get("final_url")
+                except Exception as read_err:
+                    # If JSON is malformed or unreadable, log and continue
+                    if logger:
+                        logger.warning(f"[recent] Failed reading {results_path}: {read_err}")
+
+                item = {
+                    "uuid": run_uuid,
+                    "submitted_url": submitted_url,
+                    "final_url": final_url,
+                    "timestamp": mtime_iso
+                }
+
+                items.append((mtime_epoch, item))
+            except Exception as inner_err:
+                # Keep going; a single bad folder should not break the list
+                if logger:
+                    logger.warning(f"[recent] Skipping {entry}: {inner_err}")
+
+        # Sort by mtime desc
+        try:
+            items.sort(key=lambda t: t[0], reverse=True)
+        except Exception as sort_err:
+            if logger:
+                logger.warning(f"[recent] Sort failed: {sort_err}")
+
+        # Trim to limit without list comprehensions
+        trimmed = []
+        count = 0
+        for tup in items:
+            if count >= limit:
+                break
+            trimmed.append(tup[1])
+            count = count + 1
+
+        return trimmed
+
+    except Exception as outer_err:
+        if logger:
+            logger.error(f"[recent] Unexpected error while scanning {storage_dir}: {outer_err}")
+        return []
--- a/app/utils/rules_engine.py
+++ b/app/utils/rules_engine.py
@@ -0,0 +1,132 @@
+"""
+rules_engine.py
+
+A flexible rule-based engine for detecting suspicious patterns in scripts, forms,
+or other web artifacts inside SneakyScope. 
+
+Each rule is defined as:
+    - name: str            # Rule identifier
+    - description: str     # Human-readable reason for analysts
+    - category: str        # e.g., 'script', 'form', 'text', 'generic'
+    - type: str            # 'regex' or 'function'
+    - pattern: str         # Regex pattern (if type=regex)
+    - function: callable   # Python function returning (bool, str) (if type=function)
+
+The framework returns a list of results, with pass/fail and reasoning.
+"""
+
+import re
+from pathlib import Path
+from typing import Callable, Dict, List, Tuple, Union
+
+import yaml
+
+
+class Rule:
+    """Represents a single detection rule."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        category: str,
+        rule_type: str = "regex",
+        pattern: str = None,
+        function: Callable = None,
+    ):
+        self.name = name
+        self.description = description
+        self.category = category
+        self.rule_type = rule_type
+        self.pattern = pattern
+        self.function = function
+
+    def run(self, text: str) -> Tuple[bool, str]:
+        """
+        Run the rule on given text.
+
+        Returns:
+            (matched: bool, reason: str)
+        """
+        if self.rule_type == "regex" and self.pattern:
+            if re.search(self.pattern, text, re.IGNORECASE):
+                return True, f"Matched regex '{self.pattern}' → {self.description}"
+            else:
+                return False, "No match"
+        elif self.rule_type == "function" and callable(self.function):
+            return self.function(text)
+        else:
+            return False, "Invalid rule configuration"
+
+
+class RuleEngine:
+    """Loads and executes rules against provided text."""
+
+    def __init__(self, rules: List[Rule] = None):
+        self.rules = rules or []
+
+    def add_rule(self, rule: Rule):
+        """Add a new rule at runtime."""
+        self.rules.append(rule)
+
+    def run_all(self, text: str, category: str = None) -> List[Dict]:
+        """
+        Run all rules against text.
+
+        Args:
+            text: str → the content to test
+            category: str → optional, only run rules in this category
+
+        Returns:
+            List of dicts with rule results.
+        """
+        results = []
+        for rule in self.rules:
+            if category and rule.category != category:
+                continue
+
+            matched, reason = rule.run(text)
+            results.append(
+                {
+                    "rule": rule.name,
+                    "category": rule.category,
+                    "matched": matched,
+                    "reason": reason if matched else None,
+                }
+            )
+        return results
+
+
+def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
+    """
+    Load rules from a YAML file.
+
+    Example YAML format:
+        - name: suspicious_eval
+          description: "Use of eval() in script"
+          category: script
+          type: regex
+          pattern: "\\beval\\("
+
+        - name: password_reset
+          description: "Password reset wording"
+          category: text
+          type: regex
+          pattern: "reset password"
+
+    """
+    rules = []
+    with open(yaml_file, "r", encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+
+    for item in data:
+        rule = Rule(
+            name=item["name"],
+            description=item["description"],
+            category=item["category"],
+            rule_type=item.get("type", "regex"),
+            pattern=item.get("pattern"),
+        )
+        rules.append(rule)
+
+    return rules
--- a/app/utils/settings.py
+++ b/app/utils/settings.py
@@ -0,0 +1,144 @@
+# 
+# Note the settings file is hardcoded in this class at the top after imports.
+# 
+# To make a new settings section, just add the setting dict to your yaml 
+# and then define the data class below in the config data classes area.
+# 
+# Example use from anywhere - this will always return the same singleton 
+# from settings import get_settings
+# def main():
+#     settings = get_settings()
+#     print(settings.database.host)  # Autocomplete works
+#     print(settings.logging.level)
+
+# if __name__ == "__main__":
+#     main()
+
+import functools
+from pathlib import Path
+from typing import Any, Callable, TypeVar
+from dataclasses import dataclass, fields, is_dataclass, field, MISSING
+
+import logging
+import sys
+logger = logging.getLogger(__file__)
+
+try:
+    import yaml
+except ModuleNotFoundError:
+    msg = (
+        "Required modules are not installed. "
+        "Can not continue with module / application loading.\n"
+        "Install it with: pip install -r requirements"
+    )
+    print(msg, file=sys.stderr)
+    logger.error(msg)
+    exit()
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+DEFAULT_SETTINGS_FILE = BASE_DIR / "config" / "settings.yaml"
+
+# ---------- CONFIG DATA CLASSES ----------
+@dataclass
+class Cache_Config:
+    whois_cache_days: int = 7
+    geoip_cache_days: int = 7
+    recent_runs_count: int = 10
+
+
+@dataclass
+class AppConfig:
+    name: str = "MyApp"
+    version_major: int = 1
+    version_minor: int = 0
+
+
+@dataclass
+class Settings:
+    cache: Cache_Config = field(default_factory=Cache_Config)
+    app: AppConfig = field(default_factory=AppConfig)
+
+    @classmethod
+    def from_yaml(cls, path: str | Path) -> "Settings":
+        try:
+            """Load settings from YAML file into a Settings object."""
+            with open(path, "r", encoding="utf-8") as f:
+                raw: dict[str, Any] = yaml.safe_load(f) or {}
+        except FileNotFoundError:
+            logger.warning(f"Settings file {path} not found! Using default settings.")
+            raw = {}
+
+        init_kwargs = {}
+        for f_def in fields(cls):
+            yaml_value = raw.get(f_def.name, None)
+
+            # Determine default value from default_factory or default
+            if f_def.default_factory is not MISSING:
+                default_value = f_def.default_factory()
+            elif f_def.default is not MISSING:
+                default_value = f_def.default
+            else:
+                default_value = None
+
+            # Handle nested dataclasses
+            if is_dataclass(f_def.type):
+                if isinstance(yaml_value, dict):
+                    # Merge YAML values with defaults
+                    merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
+                    merged_data.update(yaml_value)
+                    init_kwargs[f_def.name] = f_def.type(**merged_data)
+                else:
+                    init_kwargs[f_def.name] = default_value
+            else:
+                init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
+
+        return cls(**init_kwargs)
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = TypeVar("T")
+
+def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
+    """Ensure the function only runs once, returning the cached value."""
+    cache: dict[str, T] = {}
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        if func.__name__ not in cache:
+            cache[func.__name__] = func(*args, **kwargs)
+        return cache[func.__name__]
+
+    return wrapper
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = TypeVar("T")
+
+def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to ensure the settings are loaded only once."""
+    cache: dict[str, T] = {}
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        if func.__name__ not in cache:
+            cache[func.__name__] = func(*args, **kwargs)
+        return cache[func.__name__]
+
+    return wrapper
+
+
+@singleton_loader
+def get_settings(config_path: str | Path | None = None) -> Settings:
+    """
+    Returns the singleton Settings instance.
+
+    Args:
+        config_path: Optional path to the YAML config file. If not provided,
+                     defaults to 'config/settings.yaml' in the current working directory.
+    """
+    if config_path is None:
+        config_path = DEFAULT_SETTINGS_FILE
+    else:
+        config_path = Path(config_path)
+
+    return Settings.from_yaml(config_path)
--- a/app/wsgi.py
+++ b/app/wsgi.py
@@ -0,0 +1,10 @@
+"""
+app/wsgi.py
+
+Gunicorn entrypoint for SneakyScope.
+"""
+
+from . import create_app
+
+# Gunicorn will look for "app"
+app = create_app()
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,13 @@
+services:
+  web:
+    build: .
+    container_name: url-sandbox-web
+    ports:
+      - "8000:8000"
+    env_file:
+      - .env
+    volumes:
+      - ./data:/data
+    security_opt:
+      - no-new-privileges:true
+    restart: unless-stopped
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -0,0 +1,71 @@
+
+## Priority 1 – Core Functionality / Stability
+
+**Permissions / Storage Paths**
+
+* ✅ `/data` and other mounted volumes setup handled by `sandbox.sh`
+* ✅ Downloads, screenshots, and HTML artifacts are written correctly (`safe_write` in `io_helpers.py`)
+
+---
+
+## Priority 2 – Data Accuracy / Enrichment
+
+**WHOIS & GeoIP Enhancements**
+
+* ✅ Implemented Python-based WHOIS parsing with fallback to raw WHOIS text
+* ✅ Default `"Possible Privacy"` or `"N/A"` for missing WHOIS fields
+* ✅ GeoIP + ASN + ISP info displayed per IP in **accordion tables**
+* ✅ Cache WHOIS and GeoIP results to reduce repeated queries
+
+**Suspicious Scripts & Forms**
+
+* [ ] Expand flagged script and form output with reasons for analysts
+* [ ] Show each check and if it triggered flags (pass/fail for each check)
+
+**Add Suspicious BEC words**
+
+* ✅ Look for things like `"reset password"`
+* ✅ Make configurable via a config file (yaml doc with rules)
+
+---
+
+## Priority 3 – User Interface / UX
+
+**Front Page / Input Handling**
+
+* [ ] Automatically prepend `http://`, `https://`, and/or `www.` if a user only enters a domain
+
+**Result Templates / Cards**
+* [ ] load sourcecode for webpage in a code editor view or code block on page so that it's easier to read
+* [ ] Update result cards with clear, analyst-friendly explanations
+* [ ] Include flagged logic and reason lists for scripts and forms
+* ✅ Display GeoIP results in accordion tables (✅ done)
+
+---
+
+## Priority 4 – API Layer
+
+**API Endpoints**
+
+* [ ] Add `/screenshot` endpoint
+* [ ] Add `/source` endpoint
+* [ ] Add `/analyse` endpoint
+
+**OpenAPI + Docs**
+
+* [ ] Create initial `openapi/openapi.yaml` spec file
+* [ ] Serve spec at `/api/openapi.yaml`
+* [ ] Wire up Swagger UI or Redoc at `/docs` for interactive API exploration
+
+---
+
+## Priority 5 – Optional / Cleanup
+
+**Artifact Management**
+* [ ] Implement saving of results from a UUID as "results.json" so we don't rerun all the rules and just load from cache.
+* [ ] Implement cleanup or retention policy for old artifacts
+* [ ] Optional: Add periodic maintenance scripts for storage
+
+**Extra Features**
+
+* [ ] Placeholder for additional features (e.g., bulk URL analysis, alerting, integrations)
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Ensure browsers are installed (the base image already has them, but this is safe)
+python - <<'PY'
+from pathlib import Path
+from playwright.__main__ import main as pw
+# no-op import ensures playwright is present; install step below is quick if cached
+PY
+
+# Run the app via gunicorn
+# graceful-timeout - 300 ensures long page loads aren’t killed prematurely
+# threads - 8 gives us more threads to work with
+# gthread allows each worker to handle multiple threads, so async/blocking tasks like Playwright won’t block the whole worker
+exec gunicorn \
+  --bind 0.0.0.0:8000 \
+  --workers 2 \
+  --threads 8 \
+  --worker-class gthread \
+  --timeout 300 \
+  --graceful-timeout 300 \
+  "app.wsgi:app"
--- a/openapi/openapi.yaml
+++ b/openapi/openapi.yaml
@@ -0,0 +1,94 @@
+openapi: 3.0.3
+info:
+  title: URL Sandbox API
+  version: 0.1.0
+  description: API for analyzing and extracting website artifacts.
+
+servers:
+  - url: http://localhost:5000/api
+    description: Local development
+
+paths:
+  /screenshot:
+    post:
+      summary: Capture a screenshot of a website
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - url
+              properties:
+                url:
+                  type: string
+                  example: "http://example.com"
+      responses:
+        '200':
+          description: Screenshot image returned
+          content:
+            image/png: {}
+        '400':
+          description: Invalid request
+
+  /source:
+    post:
+      summary: Retrieve HTML source of a website
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - url
+              properties:
+                url:
+                  type: string
+                  example: "http://example.com"
+      responses:
+        '200':
+          description: Raw HTML source
+          content:
+            text/html:
+              schema:
+                type: string
+        '400':
+          description: Invalid request
+
+  /analyse:
+    post:
+      summary: Run full analysis on a website
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - url
+              properties:
+                url:
+                  type: string
+                  example: "http://example.com"
+      responses:
+        '200':
+          description: JSON with enrichment and analysis results
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  url:
+                    type: string
+                  whois:
+                    type: object
+                  geoip:
+                    type: object
+                  flags:
+                    type: array
+                    items:
+                      type: string
+        '400':
+          description: Invalid request
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+Flask>=3.0.3
+Jinja2>=3.1.4
+Werkzeug>=3.0.3
+itsdangerous>=2.2.0
+click>=8.1.7
+lxml>=5.3.0
+playwright==1.45.0      # Playwright stack
+beautifulsoup4>=4.12.3  # HTML parsing, etc.
+gunicorn>=22.0.0        # Production server
+python-whois            # For WHOIS lookups
+geoip2                  # MaxMind GeoLite2 database for IP geolocation
+dnspython               # For DNS lookups, including A/AAAA records
+ipwhois
+PyYAML
--- a/sandbox.sh
+++ b/sandbox.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# --- CONFIG ---
+SANDBOX_STORAGE="${SANDBOX_STORAGE:-./data}"
+APP_URL="${APP_URL:-http://localhost:8000}"
+
+# --- FUNCTIONS ---
+prepare_storage() {
+    echo "[*] Checking storage path: $SANDBOX_STORAGE"
+    if [ ! -d "$SANDBOX_STORAGE" ]; then
+        echo "    -> Creating $SANDBOX_STORAGE on host"
+        sudo mkdir -p "$SANDBOX_STORAGE"
+    fi
+
+    echo "    -> Setting ownership to Playwright user (pwuser / UID 1000)"
+    sudo chown -R 1000:1000 "$SANDBOX_STORAGE"
+    sudo chmod -R 755 "$SANDBOX_STORAGE"
+
+    echo "[+] Storage ready."
+}
+
+start_stack() {
+    prepare_storage
+    echo "[*] Building Docker image..."
+    docker compose build
+
+    if [[ "${1:-}" == "-d" ]]; then
+        echo "[*] Starting services in detached mode..."
+        docker compose up -d
+    else
+        echo "[*] Starting services (attached)..."
+        docker compose up
+    fi
+}
+
+stop_stack() {
+    echo "[*] Stopping services..."
+    docker compose down
+}
+
+clean_stack() {
+    echo "[*] Removing containers, networks, and volumes..."
+    docker compose down -v --remove-orphans
+}
+
+restart_stack() {
+    stop_stack
+    echo "[*] Restarting services..."
+    start_stack -d
+}
+
+logs_stack() {
+    echo "[*] Showing logs (Ctrl+C to exit)..."
+    docker compose logs -f
+}
+
+status_stack() {
+    echo "[*] Current service status:"
+    docker compose ps
+}
+
+healthcheck_stack() {
+    echo "[*] Running health check on $APP_URL ..."
+    if curl -fsS "$APP_URL" > /dev/null; then
+        echo "[+] Service is healthy and reachable."
+    else
+        echo "[!] Service is NOT reachable at $APP_URL"
+        exit 1
+    fi
+}
+
+# --- MAIN ---
+case "${1:-}" in
+    start)
+        shift
+        start_stack "$@"
+        ;;
+    stop)
+        stop_stack
+        ;;
+    restart)
+        restart_stack
+        ;;
+    clean)
+        clean_stack
+        ;;
+    logs)
+        logs_stack
+        ;;
+    status)
+        status_stack
+        ;;
+    healthcheck)
+        healthcheck_stack
+        ;;
+    *)
+        echo "Usage: $0 {start [-d for detached mode] | stop | restart | clean | logs | status | healthcheck}"
+        exit 1
+        ;;
+esac