feat(text): add text analysis pipeline & surface results in UI

- engine: add analyse_text() to extract visible page text and evaluate
  category="text" rules; collect matched phrases and expose as
  `content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len).
- engine: removed unused code
- browser: removed double call for enrichment
- engine: improve regex compilation — honor per-rule flags (string or list)
  and default IGNORECASE when category=="text".
- engine: add dispatch logging "[engine] applying categories: …" gated by
  settings.app.print_rule_dispatch.
- ui(templates): add `templates/partials/result_text.html` mirroring the forms
  table; renders page-level records and their matched rules.
- ui(controller): wire `analyse_text()` into scan path and expose
  `payload["suspicious_text"]`.
- rules(text): add `identity_verification_prompt`, `gated_document_access`,
  `email_collection_prompt`; broaden `credential_reset`.

fix: text indicators were not displayed due to missing analyzer and mismatched result shape.

Result shape:
  suspicious_text: [
    {
      "type": "page",
      "content_snippet": "...matched phrases…",
      "rules": [
        {"name": "...", "description": "...", "severity": "medium", "tags": ["..."]}
      ]
    }
  ]
This commit is contained in:
2025-08-22 17:18:50 -05:00
parent af253c858c
commit 55cd81aec0
13 changed files with 422 additions and 115 deletions

View File

@@ -2,7 +2,15 @@ app:
name: SneakyScope
version_major: 0
version_minor: 1
print_rule_loads: True
# logs when rules are loaded
log_rule_loads: False
# logs each category of rule ran
log_rule_dispatch: False
# logs rule pass/fail per rule
log_rule_debug: False
cache:
recent_runs_count: 10

View File

@@ -96,39 +96,49 @@
severity: high
tags: [credentials, form]
# --- Text Rules (Social Engineering / BEC) ---
- name: urgent_request
description: "Language suggesting urgency (common in phishing/BEC)"
category: text
type: regex
pattern: '\b(urgent|immediately|asap|action\s*required|verify\s*now)\b'
severity: medium
tags: [bec, urgency]
# --- Text Rules (Social Engineering / BEC / Lures) ---
- name: account_suspension
description: "Threat of account suspension/closure"
- name: identity_verification_prompt
description: "Prompts to verify identity/account/email, often gating access"
category: text
type: regex
pattern: '\b(account\s*(suspend|closure|close)|verify\s*account)\b'
# e.g., "verify your identity", "confirm your email", "validate account"
pattern: '\b(verify|confirm|validate)\s+(?:your\s+)?(identity|account|email)\b'
flags: [i]
severity: medium
tags: [bec, scare-tactics]
tags: [bec, verification, gating]
- name: financial_request
description: "Request for gift cards, wire transfer, or money"
- name: gated_document_access
description: "Language gating document access behind an action"
category: text
type: regex
pattern: '\b(gift\s*card|wire\s*transfer|bank\s*account|bitcoin|crypto|payment\s*required)\b'
severity: high
tags: [bec, financial]
# e.g., "access your secure document", "unlock document", "view document" + action verbs nearby
pattern: '(secure|confidential)\s+document|access\s+(?:the|your)?\s*document|unlock\s+document'
flags: [i]
severity: medium
tags: [lure, document]
- name: email_collection_prompt
description: "Explicit prompt to enter/provide an email address to proceed"
category: text
type: regex
# e.g., "enter your email address", "provide email", "use your email to continue"
pattern: '\b(enter|provide|use)\s+(?:your\s+)?email\s+(?:address)?\b'
flags: [i]
severity: low
tags: [data-collection, email]
- name: credential_reset
description: "Password reset or credential reset wording"
description: "Password/credential reset or login-to-continue wording"
category: text
type: regex
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*verify|password\s*expiry)\b'
# includes: reset password, update credentials, log in to (verify|view|access), password expiry/expiration
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*(?:verify|view|access)|password\s*(?:expiry|expiration|expires))\b'
flags: [i]
severity: medium
tags: [bec, credentials]
- name: suspicious_iframe
description: "Iframe tag present (possible phishing/malvertising/drive-by)"
category: text