SneakyScope/app/utils/rules_engine.py

"""
rules_engine.py

A flexible rule-based engine for detecting suspicious patterns in scripts, forms,
or other web artifacts inside SneakyScope.

Each rule is defined as:
    - name: str            # Rule identifier
    - description: str     # Human-readable reason for analysts
    - category: str        # e.g., 'script', 'form', 'text', 'generic'
    - type: str            # 'regex' or 'function'
    - pattern: str         # Regex pattern (if type=regex)
    - function: callable   # Python function returning (bool, str) (if type=function)

The framework returns a list of results, with pass/fail and reasoning.
"""

import re
from pathlib import Path
from typing import Callable, Dict, List, Tuple, Union

import yaml


class Rule:
    """Represents a single detection rule."""

    def __init__(
        self,
        name: str,
        description: str,
        category: str,
        rule_type: str = "regex",
        pattern: str = None,
        function: Callable = None,
    ):
        self.name = name
        self.description = description
        self.category = category
        self.rule_type = rule_type
        self.pattern = pattern
        self.function = function

    def run(self, text: str) -> Tuple[bool, str]:
        """
        Run the rule on given text.

        Returns:
            (matched: bool, reason: str)
        """
        if self.rule_type == "regex" and self.pattern:
            if re.search(self.pattern, text, re.IGNORECASE):
                return True, f"Matched regex '{self.pattern}' → {self.description}"
            else:
                return False, "No match"
        elif self.rule_type == "function" and callable(self.function):
            return self.function(text)
        else:
            return False, "Invalid rule configuration"


class RuleEngine:
    """Loads and executes rules against provided text."""

    def __init__(self, rules: List[Rule] = None):
        self.rules = rules or []

    def add_rule(self, rule: Rule):
        """Add a new rule at runtime."""
        self.rules.append(rule)

    def run_all(self, text: str, category: str = None) -> List[Dict]:
        """
        Run all rules against text.

        Args:
            text: str → the content to test
            category: str → optional, only run rules in this category

        Returns:
            List of dicts with rule results.
        """
        results = []
        for rule in self.rules:
            if category and rule.category != category:
                continue

            matched, reason = rule.run(text)
            results.append(
                {
                    "rule": rule.name,
                    "category": rule.category,
                    "matched": matched,
                    "reason": reason if matched else None,
                }
            )
        return results


def load_rules_from_yaml(yaml_file: Union[str, Path]) -> List[Rule]:
    """
    Load rules from a YAML file.

    Example YAML format:
        - name: suspicious_eval
          description: "Use of eval() in script"
          category: script
          type: regex
          pattern: "\\beval\\("

        - name: password_reset
          description: "Password reset wording"
          category: text
          type: regex
          pattern: "reset password"

    """
    rules = []
    with open(yaml_file, "r", encoding="utf-8") as f:
        data = yaml.safe_load(f)

    for item in data:
        rule = Rule(
            name=item["name"],
            description=item["description"],
            category=item["category"],
            rule_type=item.get("type", "regex"),
            pattern=item.get("pattern"),
        )
        rules.append(rule)

    return rules