feat(text): add text analysis pipeline & surface results in UI
- engine: add analyse_text() to extract visible page text and evaluate
category="text" rules; collect matched phrases and expose as
`content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len).
- engine: removed unused code
- browser: removed double call for enrichment
- engine: improve regex compilation — honor per-rule flags (string or list)
and default IGNORECASE when category=="text".
- engine: add dispatch logging "[engine] applying categories: …" gated by
settings.app.print_rule_dispatch.
- ui(templates): add `templates/partials/result_text.html` mirroring the forms
table; renders page-level records and their matched rules.
- ui(controller): wire `analyse_text()` into scan path and expose
`payload["suspicious_text"]`.
- rules(text): add `identity_verification_prompt`, `gated_document_access`,
`email_collection_prompt`; broaden `credential_reset`.
fix: text indicators were not displayed due to missing analyzer and mismatched result shape.
Result shape:
suspicious_text: [
{
"type": "page",
"content_snippet": "...matched phrases…",
"rules": [
{"name": "...", "description": "...", "severity": "medium", "tags": ["..."]}
]
}
]
This commit is contained in:
@@ -96,39 +96,49 @@
|
||||
severity: high
|
||||
tags: [credentials, form]
|
||||
|
||||
# --- Text Rules (Social Engineering / BEC) ---
|
||||
- name: urgent_request
|
||||
description: "Language suggesting urgency (common in phishing/BEC)"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: '\b(urgent|immediately|asap|action\s*required|verify\s*now)\b'
|
||||
severity: medium
|
||||
tags: [bec, urgency]
|
||||
# --- Text Rules (Social Engineering / BEC / Lures) ---
|
||||
|
||||
- name: account_suspension
|
||||
description: "Threat of account suspension/closure"
|
||||
- name: identity_verification_prompt
|
||||
description: "Prompts to verify identity/account/email, often gating access"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: '\b(account\s*(suspend|closure|close)|verify\s*account)\b'
|
||||
# e.g., "verify your identity", "confirm your email", "validate account"
|
||||
pattern: '\b(verify|confirm|validate)\s+(?:your\s+)?(identity|account|email)\b'
|
||||
flags: [i]
|
||||
severity: medium
|
||||
tags: [bec, scare-tactics]
|
||||
tags: [bec, verification, gating]
|
||||
|
||||
- name: financial_request
|
||||
description: "Request for gift cards, wire transfer, or money"
|
||||
- name: gated_document_access
|
||||
description: "Language gating document access behind an action"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: '\b(gift\s*card|wire\s*transfer|bank\s*account|bitcoin|crypto|payment\s*required)\b'
|
||||
severity: high
|
||||
tags: [bec, financial]
|
||||
# e.g., "access your secure document", "unlock document", "view document" + action verbs nearby
|
||||
pattern: '(secure|confidential)\s+document|access\s+(?:the|your)?\s*document|unlock\s+document'
|
||||
flags: [i]
|
||||
severity: medium
|
||||
tags: [lure, document]
|
||||
|
||||
- name: email_collection_prompt
|
||||
description: "Explicit prompt to enter/provide an email address to proceed"
|
||||
category: text
|
||||
type: regex
|
||||
# e.g., "enter your email address", "provide email", "use your email to continue"
|
||||
pattern: '\b(enter|provide|use)\s+(?:your\s+)?email\s+(?:address)?\b'
|
||||
flags: [i]
|
||||
severity: low
|
||||
tags: [data-collection, email]
|
||||
|
||||
- name: credential_reset
|
||||
description: "Password reset or credential reset wording"
|
||||
description: "Password/credential reset or login-to-continue wording"
|
||||
category: text
|
||||
type: regex
|
||||
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*verify|password\s*expiry)\b'
|
||||
# includes: reset password, update credentials, log in to (verify|view|access), password expiry/expiration
|
||||
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*(?:verify|view|access)|password\s*(?:expiry|expiration|expires))\b'
|
||||
flags: [i]
|
||||
severity: medium
|
||||
tags: [bec, credentials]
|
||||
|
||||
|
||||
- name: suspicious_iframe
|
||||
description: "Iframe tag present (possible phishing/malvertising/drive-by)"
|
||||
category: text
|
||||
|
||||
Reference in New Issue
Block a user