feat(text): add text analysis pipeline & surface results in UI

- engine: add analyse_text() to extract visible page text and evaluate category="text" rules; collect matched phrases and expose as `content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len). - engine: removed unused code - browser: removed double call for enrichment - engine: improve regex compilation — honor per-rule flags (string or list) and default IGNORECASE when category=="text". - engine: add dispatch logging "[engine] applying categories: …" gated by settings.app.print_rule_dispatch. - ui(templates): add `templates/partials/result_text.html` mirroring the forms table; renders page-level records and their matched rules. - ui(controller): wire `analyse_text()` into scan path and expose `payload["suspicious_text"]`. - rules(text): add `identity_verification_prompt`, `gated_document_access`, `email_collection_prompt`; broaden `credential_reset`. fix: text indicators were not displayed due to missing analyzer and mismatched result shape. Result shape: suspicious_text: [ { "type": "page", "content_snippet": "...matched phrases…", "rules": [ {"name": "...", "description": "...", "severity": "medium", "tags": ["..."]} ] } ]
2025-08-22 17:18:50 -05:00
parent af253c858c
commit 55cd81aec0
13 changed files with 422 additions and 115 deletions
--- a/app/config/suspicious_rules.yaml
+++ b/app/config/suspicious_rules.yaml
@@ -96,39 +96,49 @@
  severity: high
  tags: [credentials, form]

-# --- Text Rules (Social Engineering / BEC) ---
- name: urgent_request
-  description: "Language suggesting urgency (common in phishing/BEC)"
-  category: text
-  type: regex
-  pattern: '\b(urgent|immediately|asap|action\s*required|verify\s*now)\b'
-  severity: medium
-  tags: [bec, urgency]
+# --- Text Rules (Social Engineering / BEC / Lures) ---

- name: account_suspension
-  description: "Threat of account suspension/closure"
+- name: identity_verification_prompt
+  description: "Prompts to verify identity/account/email, often gating access"
  category: text
  type: regex
-  pattern: '\b(account\s*(suspend|closure|close)|verify\s*account)\b'
+  # e.g., "verify your identity", "confirm your email", "validate account"
+  pattern: '\b(verify|confirm|validate)\s+(?:your\s+)?(identity|account|email)\b'
+  flags: [i]
  severity: medium
-  tags: [bec, scare-tactics]
+  tags: [bec, verification, gating]

- name: financial_request
-  description: "Request for gift cards, wire transfer, or money"
+- name: gated_document_access
+  description: "Language gating document access behind an action"
  category: text
  type: regex
-  pattern: '\b(gift\s*card|wire\s*transfer|bank\s*account|bitcoin|crypto|payment\s*required)\b'
-  severity: high
-  tags: [bec, financial]
+  # e.g., "access your secure document", "unlock document", "view document" + action verbs nearby
+  pattern: '(secure|confidential)\s+document|access\s+(?:the|your)?\s*document|unlock\s+document'
+  flags: [i]
+  severity: medium
+  tags: [lure, document]
+
+- name: email_collection_prompt
+  description: "Explicit prompt to enter/provide an email address to proceed"
+  category: text
+  type: regex
+  # e.g., "enter your email address", "provide email", "use your email to continue"
+  pattern: '\b(enter|provide|use)\s+(?:your\s+)?email\s+(?:address)?\b'
+  flags: [i]
+  severity: low
+  tags: [data-collection, email]

 - name: credential_reset
-  description: "Password reset or credential reset wording"
+  description: "Password/credential reset or login-to-continue wording"
  category: text
  type: regex
-  pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*verify|password\s*expiry)\b'
+  # includes: reset password, update credentials, log in to (verify|view|access), password expiry/expiration
+  pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*(?:verify|view|access)|password\s*(?:expiry|expiration|expires))\b'
+  flags: [i]
  severity: medium
  tags: [bec, credentials]

+
 - name: suspicious_iframe
  description: "Iframe tag present (possible phishing/malvertising/drive-by)"
  category: text