first commit

2025-08-20 21:22:28 +00:00
commit 70d29f9f95
26 changed files with 2558 additions and 0 deletions
--- a/app/utils/cache_db.py
+++ b/app/utils/cache_db.py
@@ -0,0 +1,128 @@
+import json
+import time
+import sqlite3
+import threading
+import functools
+from pathlib import Path
+from typing import Any, Optional
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = Any
+
+def singleton_loader(func):
+    """Ensure only one cache instance exists."""
+    cache: dict[str, T] = {}
+    lock = threading.Lock()
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        with lock:
+            if func.__name__ not in cache:
+                cache[func.__name__] = func(*args, **kwargs)
+            return cache[func.__name__]
+    return wrapper
+
+# ---------- CACHE CLASS ----------
+class CacheDB:
+    """SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support."""
+
+    TABLE_NAME = "cache"
+
+    def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440):
+        """
+        :param default_expiration_minutes: default expiration in minutes (default 24 hours)
+        """
+        self.db_path = Path(db_path)
+        self.default_expiration = default_expiration_minutes * 60  # convert minutes -> seconds
+
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+        self._lock = threading.Lock()
+        self._create_table()
+
+    def _create_table(self):
+        """Create the cache table if it doesn't exist."""
+        with self._lock:
+            self.conn.execute(f"""
+                CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} (
+                    key TEXT PRIMARY KEY,
+                    value TEXT,
+                    expires_at INTEGER
+                )
+            """)
+            self.conn.commit()
+
+    def _cleanup_expired(self):
+        """Delete expired rows."""
+        now = int(time.time())
+        with self._lock:
+            self.conn.execute(
+                f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,)
+            )
+            self.conn.commit()
+
+    # ---------- CRUD ----------
+    def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
+        """Insert or update a cache entry. expires_in_minutes overrides default expiration."""
+        self._cleanup_expired()
+        if expires_in_minutes is None:
+            expires_in_seconds = self.default_expiration
+        else:
+            expires_in_seconds = expires_in_minutes * 60
+        expires_at = int(time.time()) + expires_in_seconds
+
+        value_json = json.dumps(value)
+        with self._lock:
+            self.conn.execute(
+                f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)",
+                (key, value_json, expires_at)
+            )
+            self.conn.commit()
+
+    def read(self, key: str) -> Optional[Any]:
+        """Read a cache entry. Auto-cleans expired items."""
+        self._cleanup_expired()
+        with self._lock:
+            row = self.conn.execute(
+                f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,)
+            ).fetchone()
+            if not row:
+                return None
+            return json.loads(row["value"])
+
+    def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
+        """Update a cache entry. Optional expiration in minutes."""
+        if expires_in_minutes is None:
+            expires_in_seconds = self.default_expiration
+        else:
+            expires_in_seconds = expires_in_minutes * 60
+        expires_at = int(time.time()) + expires_in_seconds
+
+        value_json = json.dumps(value)
+        with self._lock:
+            self.conn.execute(
+                f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?",
+                (value_json, expires_at, key)
+            )
+            self.conn.commit()
+
+    def delete(self, key: str):
+        with self._lock:
+            self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,))
+            self.conn.commit()
+
+    def clear(self):
+        """Delete all rows from the cache table."""
+        with self._lock:
+            self.conn.execute(f"DELETE FROM {self.TABLE_NAME}")
+            self.conn.commit()
+
+    def close(self):
+        self.conn.close()
+
+
+# ---------- SINGLETON INSTANCE ----------
+@singleton_loader
+def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB:
+    return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes)
--- a/app/utils/io_helpers.py
+++ b/app/utils/io_helpers.py
@@ -0,0 +1,115 @@
+import json
+import logging
+from pathlib import Path
+from datetime import datetime
+
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+
+def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
+    """Write content to a file safely with logging."""
+    path = Path(path)
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, mode, encoding=encoding) as f:
+            f.write(content)
+        logging.info(f"[+] Wrote file: {path}")
+    except Exception as e:
+        logging.error(f"[!] Failed writing {path}: {e}")
+        raise
+
+def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
+    """
+    Scan the SANDBOX_STORAGE directory for run folders (UUIDs), read each
+    run's results.json, and return the most recent N entries by file mtime.
+
+    Args:
+        storage_dir (Path): Base path where UUID run directories live.
+        limit (int): Maximum number of recent items to return.
+        logger: Flask or stdlib logger to record non-fatal issues.
+
+    Returns:
+        list[dict]: Each item includes:
+            {
+              "uuid": str,
+              "submitted_url": str | None,
+              "final_url": str | None,
+              "timestamp": str (ISO 8601),
+            }
+        Returns an empty list if no runs are found or on error.
+    """
+    items = []
+
+    try:
+        # Ensure the storage dir exists
+        storage_dir.mkdir(parents=True, exist_ok=True)
+
+        # Iterate directories directly under storage_dir
+        for entry in storage_dir.iterdir():
+            try:
+                if not entry.is_dir():
+                    # Skip non-directories
+                    continue
+
+                # Expect results.json inside each UUID directory
+                results_path = entry / "results.json"
+                if not results_path.exists():
+                    # Skip folders without results.json
+                    continue
+
+                # Read file metadata (mtime) for sorting and display
+                stat_info = results_path.stat()
+                mtime_epoch = stat_info.st_mtime
+                mtime_iso = datetime.fromtimestamp(mtime_epoch).isoformat(timespec="seconds")
+
+                # Parse a small subset of the JSON for display
+                submitted_url = None
+                final_url = None
+                run_uuid = entry.name
+
+                try:
+                    with open(results_path, "r", encoding="utf-8") as f:
+                        data = json.load(f)
+
+                    if isinstance(data, dict):
+                        submitted_url = data.get("submitted_url")
+                        final_url = data.get("final_url")
+                except Exception as read_err:
+                    # If JSON is malformed or unreadable, log and continue
+                    if logger:
+                        logger.warning(f"[recent] Failed reading {results_path}: {read_err}")
+
+                item = {
+                    "uuid": run_uuid,
+                    "submitted_url": submitted_url,
+                    "final_url": final_url,
+                    "timestamp": mtime_iso
+                }
+
+                items.append((mtime_epoch, item))
+            except Exception as inner_err:
+                # Keep going; a single bad folder should not break the list
+                if logger:
+                    logger.warning(f"[recent] Skipping {entry}: {inner_err}")
+
+        # Sort by mtime desc
+        try:
+            items.sort(key=lambda t: t[0], reverse=True)
+        except Exception as sort_err:
+            if logger:
+                logger.warning(f"[recent] Sort failed: {sort_err}")
+
+        # Trim to limit without list comprehensions
+        trimmed = []
+        count = 0
+        for tup in items:
+            if count >= limit:
+                break
+            trimmed.append(tup[1])
+            count = count + 1
+
+        return trimmed
+
+    except Exception as outer_err:
+        if logger:
+            logger.error(f"[recent] Unexpected error while scanning {storage_dir}: {outer_err}")
+        return []
--- a/app/utils/rules_engine.py
+++ b/app/utils/rules_engine.py
@@ -0,0 +1,132 @@
+"""
+rules_engine.py
+
+A flexible rule-based engine for detecting suspicious patterns in scripts, forms,
+or other web artifacts inside SneakyScope. 
+
+Each rule is defined as:
+    - name: str            # Rule identifier
+    - description: str     # Human-readable reason for analysts
+    - category: str        # e.g., 'script', 'form', 'text', 'generic'
+    - type: str            # 'regex' or 'function'
+    - pattern: str         # Regex pattern (if type=regex)
+    - function: callable   # Python function returning (bool, str) (if type=function)
+
+The framework returns a list of results, with pass/fail and reasoning.
+"""
+
+import re
+from pathlib import Path
+from typing import Callable, Dict, List, Tuple, Union
+
+import yaml
+
+
+class Rule:
+    """Represents a single detection rule."""
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        category: str,
+        rule_type: str = "regex",
+        pattern: str = None,
+        function: Callable = None,
+    ):
+        self.name = name
+        self.description = description
+        self.category = category
+        self.rule_type = rule_type
+        self.pattern = pattern
+        self.function = function
+
+    def run(self, text: str) -> Tuple[bool, str]:
+        """
+        Run the rule on given text.
+
+        Returns:
+            (matched: bool, reason: str)
+        """
+        if self.rule_type == "regex" and self.pattern:
+            if re.search(self.pattern, text, re.IGNORECASE):
+                return True, f"Matched regex '{self.pattern}' → {self.description}"
+            else:
+                return False, "No match"
+        elif self.rule_type == "function" and callable(self.function):
+            return self.function(text)
+        else:
+            return False, "Invalid rule configuration"
+
+
+class RuleEngine:
+    """Loads and executes rules against provided text."""
+
+    def __init__(self, rules: List[Rule] = None):
+        self.rules = rules or []
+
+    def add_rule(self, rule: Rule):
+        """Add a new rule at runtime."""
+        self.rules.append(rule)
+
+    def run_all(self, text: str, category: str = None) -> List[Dict]:
+        """
+        Run all rules against text.
+
+        Args:
+            text: str → the content to test
+            category: str → optional, only run rules in this category
+
+        Returns:
+            List of dicts with rule results.
+        """
+        results = []
+        for rule in self.rules:
+            if category and rule.category != category:
+                continue
+
+            matched, reason = rule.run(text)
+            results.append(
+                {
+                    "rule": rule.name,
+                    "category": rule.category,
+                    "matched": matched,
+                    "reason": reason if matched else None,
+                }
+            )
+        return results
+
+
+def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
+    """
+    Load rules from a YAML file.
+
+    Example YAML format:
+        - name: suspicious_eval
+          description: "Use of eval() in script"
+          category: script
+          type: regex
+          pattern: "\\beval\\("
+
+        - name: password_reset
+          description: "Password reset wording"
+          category: text
+          type: regex
+          pattern: "reset password"
+
+    """
+    rules = []
+    with open(yaml_file, "r", encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+
+    for item in data:
+        rule = Rule(
+            name=item["name"],
+            description=item["description"],
+            category=item["category"],
+            rule_type=item.get("type", "regex"),
+            pattern=item.get("pattern"),
+        )
+        rules.append(rule)
+
+    return rules
--- a/app/utils/settings.py
+++ b/app/utils/settings.py
@@ -0,0 +1,144 @@
+# 
+# Note the settings file is hardcoded in this class at the top after imports.
+# 
+# To make a new settings section, just add the setting dict to your yaml 
+# and then define the data class below in the config data classes area.
+# 
+# Example use from anywhere - this will always return the same singleton 
+# from settings import get_settings
+# def main():
+#     settings = get_settings()
+#     print(settings.database.host)  # Autocomplete works
+#     print(settings.logging.level)
+
+# if __name__ == "__main__":
+#     main()
+
+import functools
+from pathlib import Path
+from typing import Any, Callable, TypeVar
+from dataclasses import dataclass, fields, is_dataclass, field, MISSING
+
+import logging
+import sys
+logger = logging.getLogger(__file__)
+
+try:
+    import yaml
+except ModuleNotFoundError:
+    msg = (
+        "Required modules are not installed. "
+        "Can not continue with module / application loading.\n"
+        "Install it with: pip install -r requirements"
+    )
+    print(msg, file=sys.stderr)
+    logger.error(msg)
+    exit()
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+DEFAULT_SETTINGS_FILE = BASE_DIR / "config" / "settings.yaml"
+
+# ---------- CONFIG DATA CLASSES ----------
+@dataclass
+class Cache_Config:
+    whois_cache_days: int = 7
+    geoip_cache_days: int = 7
+    recent_runs_count: int = 10
+
+
+@dataclass
+class AppConfig:
+    name: str = "MyApp"
+    version_major: int = 1
+    version_minor: int = 0
+
+
+@dataclass
+class Settings:
+    cache: Cache_Config = field(default_factory=Cache_Config)
+    app: AppConfig = field(default_factory=AppConfig)
+
+    @classmethod
+    def from_yaml(cls, path: str | Path) -> "Settings":
+        try:
+            """Load settings from YAML file into a Settings object."""
+            with open(path, "r", encoding="utf-8") as f:
+                raw: dict[str, Any] = yaml.safe_load(f) or {}
+        except FileNotFoundError:
+            logger.warning(f"Settings file {path} not found! Using default settings.")
+            raw = {}
+
+        init_kwargs = {}
+        for f_def in fields(cls):
+            yaml_value = raw.get(f_def.name, None)
+
+            # Determine default value from default_factory or default
+            if f_def.default_factory is not MISSING:
+                default_value = f_def.default_factory()
+            elif f_def.default is not MISSING:
+                default_value = f_def.default
+            else:
+                default_value = None
+
+            # Handle nested dataclasses
+            if is_dataclass(f_def.type):
+                if isinstance(yaml_value, dict):
+                    # Merge YAML values with defaults
+                    merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
+                    merged_data.update(yaml_value)
+                    init_kwargs[f_def.name] = f_def.type(**merged_data)
+                else:
+                    init_kwargs[f_def.name] = default_value
+            else:
+                init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
+
+        return cls(**init_kwargs)
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = TypeVar("T")
+
+def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
+    """Ensure the function only runs once, returning the cached value."""
+    cache: dict[str, T] = {}
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        if func.__name__ not in cache:
+            cache[func.__name__] = func(*args, **kwargs)
+        return cache[func.__name__]
+
+    return wrapper
+
+
+# ---------- SINGLETON DECORATOR ----------
+T = TypeVar("T")
+
+def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to ensure the settings are loaded only once."""
+    cache: dict[str, T] = {}
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        if func.__name__ not in cache:
+            cache[func.__name__] = func(*args, **kwargs)
+        return cache[func.__name__]
+
+    return wrapper
+
+
+@singleton_loader
+def get_settings(config_path: str | Path | None = None) -> Settings:
+    """
+    Returns the singleton Settings instance.
+
+    Args:
+        config_path: Optional path to the YAML config file. If not provided,
+                     defaults to 'config/settings.yaml' in the current working directory.
+    """
+    if config_path is None:
+        config_path = DEFAULT_SETTINGS_FILE
+    else:
+        config_path = Path(config_path)
+
+    return Settings.from_yaml(config_path)