feat(text): add text analysis pipeline & surface results in UI
- engine: add analyse_text() to extract visible page text and evaluate
category="text" rules; collect matched phrases and expose as
`content_snippet` (deduped, length-capped via settings.ui.snippet_preview_len).
- engine: removed unused code
- browser: removed double call for enrichment
- engine: improve regex compilation — honor per-rule flags (string or list)
and default IGNORECASE when category=="text".
- engine: add dispatch logging "[engine] applying categories: …" gated by
settings.app.print_rule_dispatch.
- ui(templates): add `templates/partials/result_text.html` mirroring the forms
table; renders page-level records and their matched rules.
- ui(controller): wire `analyse_text()` into scan path and expose
`payload["suspicious_text"]`.
- rules(text): add `identity_verification_prompt`, `gated_document_access`,
`email_collection_prompt`; broaden `credential_reset`.
fix: text indicators were not displayed due to missing analyzer and mismatched result shape.
Result shape:
suspicious_text: [
{
"type": "page",
"content_snippet": "...matched phrases…",
"rules": [
{"name": "...", "description": "...", "severity": "medium", "tags": ["..."]}
]
}
]
This commit is contained in:
@@ -4,10 +4,10 @@ from pathlib import Path
|
|||||||
from flask import Flask
|
from flask import Flask
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
from .utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
from .logging_setup import wire_logging_once, get_app_logger, get_engine_logger
|
from app.logging_setup import wire_logging_once, get_app_logger
|
||||||
|
|
||||||
from app.blueprints.ui import bp as main_bp # ui blueprint
|
from app.blueprints.main import bp as main_bp # ui blueprint
|
||||||
from app.blueprints.api import api_bp as api_bp # api blueprint
|
from app.blueprints.api import api_bp as api_bp # api blueprint
|
||||||
from app.blueprints.roadmap import bp as roadmap_bp # roadmap
|
from app.blueprints.roadmap import bp as roadmap_bp # roadmap
|
||||||
|
|
||||||
|
|||||||
@@ -134,15 +134,6 @@ def analyze():
|
|||||||
app_logger.error(f"Analysis failed for {url}: {e}")
|
app_logger.error(f"Analysis failed for {url}: {e}")
|
||||||
return redirect(url_for("main.index"))
|
return redirect(url_for("main.index"))
|
||||||
|
|
||||||
# Add enrichment safely
|
|
||||||
try:
|
|
||||||
enrichment = enrich_url(url)
|
|
||||||
result["enrichment"] = enrichment
|
|
||||||
app_logger.info(f"[+] Enrichment added for {url}")
|
|
||||||
except Exception as e:
|
|
||||||
result["enrichment"] = {}
|
|
||||||
app_logger.warning(f"[!] Enrichment failed for {url}: {e}")
|
|
||||||
|
|
||||||
# Redirect to permalink page for this run
|
# Redirect to permalink page for this run
|
||||||
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
return redirect(url_for("main.view_result", run_uuid=result["uuid"]))
|
||||||
|
|
||||||
@@ -2,7 +2,15 @@ app:
|
|||||||
name: SneakyScope
|
name: SneakyScope
|
||||||
version_major: 0
|
version_major: 0
|
||||||
version_minor: 1
|
version_minor: 1
|
||||||
print_rule_loads: True
|
|
||||||
|
# logs when rules are loaded
|
||||||
|
log_rule_loads: False
|
||||||
|
|
||||||
|
# logs each category of rule ran
|
||||||
|
log_rule_dispatch: False
|
||||||
|
|
||||||
|
# logs rule pass/fail per rule
|
||||||
|
log_rule_debug: False
|
||||||
|
|
||||||
cache:
|
cache:
|
||||||
recent_runs_count: 10
|
recent_runs_count: 10
|
||||||
|
|||||||
@@ -96,39 +96,49 @@
|
|||||||
severity: high
|
severity: high
|
||||||
tags: [credentials, form]
|
tags: [credentials, form]
|
||||||
|
|
||||||
# --- Text Rules (Social Engineering / BEC) ---
|
# --- Text Rules (Social Engineering / BEC / Lures) ---
|
||||||
- name: urgent_request
|
|
||||||
description: "Language suggesting urgency (common in phishing/BEC)"
|
|
||||||
category: text
|
|
||||||
type: regex
|
|
||||||
pattern: '\b(urgent|immediately|asap|action\s*required|verify\s*now)\b'
|
|
||||||
severity: medium
|
|
||||||
tags: [bec, urgency]
|
|
||||||
|
|
||||||
- name: account_suspension
|
- name: identity_verification_prompt
|
||||||
description: "Threat of account suspension/closure"
|
description: "Prompts to verify identity/account/email, often gating access"
|
||||||
category: text
|
category: text
|
||||||
type: regex
|
type: regex
|
||||||
pattern: '\b(account\s*(suspend|closure|close)|verify\s*account)\b'
|
# e.g., "verify your identity", "confirm your email", "validate account"
|
||||||
|
pattern: '\b(verify|confirm|validate)\s+(?:your\s+)?(identity|account|email)\b'
|
||||||
|
flags: [i]
|
||||||
severity: medium
|
severity: medium
|
||||||
tags: [bec, scare-tactics]
|
tags: [bec, verification, gating]
|
||||||
|
|
||||||
- name: financial_request
|
- name: gated_document_access
|
||||||
description: "Request for gift cards, wire transfer, or money"
|
description: "Language gating document access behind an action"
|
||||||
category: text
|
category: text
|
||||||
type: regex
|
type: regex
|
||||||
pattern: '\b(gift\s*card|wire\s*transfer|bank\s*account|bitcoin|crypto|payment\s*required)\b'
|
# e.g., "access your secure document", "unlock document", "view document" + action verbs nearby
|
||||||
severity: high
|
pattern: '(secure|confidential)\s+document|access\s+(?:the|your)?\s*document|unlock\s+document'
|
||||||
tags: [bec, financial]
|
flags: [i]
|
||||||
|
severity: medium
|
||||||
|
tags: [lure, document]
|
||||||
|
|
||||||
|
- name: email_collection_prompt
|
||||||
|
description: "Explicit prompt to enter/provide an email address to proceed"
|
||||||
|
category: text
|
||||||
|
type: regex
|
||||||
|
# e.g., "enter your email address", "provide email", "use your email to continue"
|
||||||
|
pattern: '\b(enter|provide|use)\s+(?:your\s+)?email\s+(?:address)?\b'
|
||||||
|
flags: [i]
|
||||||
|
severity: low
|
||||||
|
tags: [data-collection, email]
|
||||||
|
|
||||||
- name: credential_reset
|
- name: credential_reset
|
||||||
description: "Password reset or credential reset wording"
|
description: "Password/credential reset or login-to-continue wording"
|
||||||
category: text
|
category: text
|
||||||
type: regex
|
type: regex
|
||||||
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*verify|password\s*expiry)\b'
|
# includes: reset password, update credentials, log in to (verify|view|access), password expiry/expiration
|
||||||
|
pattern: '\b(reset\s*password|update\s*credentials|log\s*in\s*to\s*(?:verify|view|access)|password\s*(?:expiry|expiration|expires))\b'
|
||||||
|
flags: [i]
|
||||||
severity: medium
|
severity: medium
|
||||||
tags: [bec, credentials]
|
tags: [bec, credentials]
|
||||||
|
|
||||||
|
|
||||||
- name: suspicious_iframe
|
- name: suspicious_iframe
|
||||||
description: "Iframe tag present (possible phishing/malvertising/drive-by)"
|
description: "Iframe tag present (possible phishing/malvertising/drive-by)"
|
||||||
category: text
|
category: text
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ rules_engine.py
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import logging
|
import unicodedata
|
||||||
|
from collections import Counter
|
||||||
from dataclasses import dataclass, asdict, field
|
from dataclasses import dataclass, asdict, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||||
@@ -11,6 +12,18 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|||||||
from app.logging_setup import get_engine_logger
|
from app.logging_setup import get_engine_logger
|
||||||
from app.utils.settings import get_settings
|
from app.utils.settings import get_settings
|
||||||
|
|
||||||
|
import re as _re
|
||||||
|
|
||||||
|
FLAG_MAP = {
|
||||||
|
"i": _re.IGNORECASE, "ignorecase": _re.IGNORECASE,
|
||||||
|
"m": _re.MULTILINE, "multiline": _re.MULTILINE,
|
||||||
|
"s": _re.DOTALL, "dotall": _re.DOTALL, "singleline": _re.DOTALL,
|
||||||
|
"x": _re.VERBOSE, "verbose": _re.VERBOSE,
|
||||||
|
"a": _re.ASCII, "ascii": _re.ASCII,
|
||||||
|
"u": _re.UNICODE, "unicode": _re.UNICODE,
|
||||||
|
"l": _re.LOCALE, "locale": _re.LOCALE,
|
||||||
|
}
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
@@ -49,21 +62,65 @@ class Rule:
|
|||||||
"""
|
"""
|
||||||
Compile the regex pattern once for performance, if applicable.
|
Compile the regex pattern once for performance, if applicable.
|
||||||
|
|
||||||
|
Behavior:
|
||||||
|
- Uses flags specified on the rule (list like ['i','m'] or a string like 'im').
|
||||||
|
- If the rule category is 'text' and no 'i' flag is set, defaults to IGNORECASE.
|
||||||
|
- Stores the compiled object on self._compiled_regex.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if the regex is compiled and ready, False otherwise.
|
bool: True if the regex is compiled and ready, False otherwise.
|
||||||
"""
|
"""
|
||||||
|
if getattr(self, "rule_type", None) != "regex" or not getattr(self, "pattern", None):
|
||||||
|
return False
|
||||||
|
|
||||||
|
re_flags = 0
|
||||||
|
|
||||||
|
# Collect flags from the rule, if any (supports "ims" or ["i","m","s"])
|
||||||
|
raw_flags = getattr(self, "flags", None)
|
||||||
|
if isinstance(raw_flags, str):
|
||||||
|
for ch in raw_flags:
|
||||||
|
mapped = FLAG_MAP.get(ch.lower())
|
||||||
|
if mapped is not None:
|
||||||
|
re_flags |= mapped
|
||||||
|
else:
|
||||||
|
logger.warning("[Rule] Unknown regex flag %r on rule '%s'", ch, getattr(self, "name", "?"))
|
||||||
|
elif isinstance(raw_flags, (list, tuple, set)):
|
||||||
|
for fl in raw_flags:
|
||||||
|
key = str(fl).lower()
|
||||||
|
mapped = FLAG_MAP.get(key)
|
||||||
|
if mapped is not None:
|
||||||
|
re_flags |= mapped
|
||||||
|
else:
|
||||||
|
logger.warning("[Rule] Unknown regex flag %r on rule '%s'", fl, getattr(self, "name", "?"))
|
||||||
|
|
||||||
|
# Default IGNORECASE for text rules if not explicitly provided
|
||||||
|
cat = (getattr(self, "category", "") or "").lower().strip()
|
||||||
|
if cat == "text" and not (re_flags & _re.IGNORECASE):
|
||||||
|
re_flags |= _re.IGNORECASE
|
||||||
|
|
||||||
if self.rule_type == "regex" and self.pattern:
|
|
||||||
try:
|
try:
|
||||||
self._compiled_regex = re.compile(self.pattern, re.IGNORECASE)
|
self._compiled_regex = _re.compile(self.pattern, re_flags)
|
||||||
logger.debug(f"[Rule] Compiled regex for '{self.name}'")
|
|
||||||
|
# Build a compact flag summary inline (e.g., 'ims' or '-' if none)
|
||||||
|
flag_parts = []
|
||||||
|
if re_flags & _re.IGNORECASE: flag_parts.append("i")
|
||||||
|
if re_flags & _re.MULTILINE: flag_parts.append("m")
|
||||||
|
if re_flags & _re.DOTALL: flag_parts.append("s")
|
||||||
|
if re_flags & _re.VERBOSE: flag_parts.append("x")
|
||||||
|
if re_flags & _re.ASCII: flag_parts.append("a")
|
||||||
|
if re_flags & _re.UNICODE: flag_parts.append("u")
|
||||||
|
if re_flags & _re.LOCALE: flag_parts.append("l")
|
||||||
|
flag_summary = "".join(flag_parts) if flag_parts else "-"
|
||||||
|
|
||||||
|
logger.info("[Rule] Compiled regex for '%s' (flags=%s)", getattr(self, "name", "?"), flag_summary)
|
||||||
return True
|
return True
|
||||||
except re.error as rex:
|
|
||||||
|
except _re.error as rex:
|
||||||
self._compiled_regex = None
|
self._compiled_regex = None
|
||||||
logger.warning(f"[Rule] Failed to compile regex for '{self.name}': {rex}")
|
logger.warning("[Rule] Failed to compile regex for '%s': %s", getattr(self, "name", "?"), rex)
|
||||||
return False
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def run(self, text: str) -> Tuple[bool, str]:
|
def run(self, text: str) -> Tuple[bool, str]:
|
||||||
"""
|
"""
|
||||||
Run the rule on the given text.
|
Run the rule on the given text.
|
||||||
@@ -198,7 +255,7 @@ class RuleEngine:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
if settings.app.print_rule_loads:
|
if settings.app.log_rule_loads:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[engine] add_rule: %s/%s replace=%s -> count=%d",
|
"[engine] add_rule: %s/%s replace=%s -> count=%d",
|
||||||
rule.category, rule.name, bool(replace), len(self._rules)
|
rule.category, rule.name, bool(replace), len(self._rules)
|
||||||
@@ -230,6 +287,14 @@ class RuleEngine:
|
|||||||
self.add_rule(rules[i], replace=replace)
|
self.add_rule(rules[i], replace=replace)
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
|
def _normalize_for_text_rules(self, s: str) -> str:
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
s = unicodedata.normalize("NFKC", s)
|
||||||
|
# collapse whitespace; keeps word boundaries sensible
|
||||||
|
s = _re.sub(r"\s+", " ", s).strip()
|
||||||
|
return s
|
||||||
|
|
||||||
def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
|
def run_all(self, text: str, category: Optional[str] = None) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
Run all rules against text.
|
Run all rules against text.
|
||||||
@@ -241,6 +306,30 @@ class RuleEngine:
|
|||||||
Returns:
|
Returns:
|
||||||
List of dicts with PASS/FAIL per rule (JSON-serializable).
|
List of dicts with PASS/FAIL per rule (JSON-serializable).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# --- dispatch visibility --- if set to true, we log applied categories
|
||||||
|
if getattr(settings.app, "log_rule_dispatch", False):
|
||||||
|
all_cats = [r.category for r in self._rules]
|
||||||
|
cat_counts = Counter(all_cats)
|
||||||
|
# Which categories are being applied this run?
|
||||||
|
if category is None:
|
||||||
|
selected_categories = sorted(cat_counts.keys())
|
||||||
|
else:
|
||||||
|
selected_categories = [category]
|
||||||
|
|
||||||
|
# How many rules match the selection?
|
||||||
|
selected_rule_count = sum(1 for r in self._rules if r.category in selected_categories)
|
||||||
|
try:
|
||||||
|
logger.info(
|
||||||
|
"[engine] applying categories: %s | selected_rules=%d | totals=%s",
|
||||||
|
",".join(selected_categories),
|
||||||
|
selected_rule_count,
|
||||||
|
dict(cat_counts),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# --- end dispatch visibility ---
|
||||||
|
|
||||||
results: List[Dict] = []
|
results: List[Dict] = []
|
||||||
|
|
||||||
index = 0
|
index = 0
|
||||||
@@ -248,12 +337,20 @@ class RuleEngine:
|
|||||||
while index < total:
|
while index < total:
|
||||||
rule = self.rules[index]
|
rule = self.rules[index]
|
||||||
|
|
||||||
|
# if we are running a text rule, let's normalize the text.
|
||||||
|
if category == "text":
|
||||||
|
text = self._normalize_for_text_rules(text)
|
||||||
|
|
||||||
if category is not None and rule.category != category:
|
if category is not None and rule.category != category:
|
||||||
index = index + 1
|
index = index + 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
matched, reason = rule.run(text)
|
matched, reason = rule.run(text)
|
||||||
|
|
||||||
|
# very fine-grained trace per rule:
|
||||||
|
if getattr(settings.app, "log_rule_debug", False):
|
||||||
|
logger.info(f"[engine] eval: cat:{rule.category} - rule:{rule.name} - result: {matched} - reason:{reason}" )
|
||||||
|
|
||||||
result_str = "FAIL" if matched else "PASS"
|
result_str = "FAIL" if matched else "PASS"
|
||||||
reason_to_include: Optional[str]
|
reason_to_include: Optional[str]
|
||||||
if matched:
|
if matched:
|
||||||
|
|||||||
@@ -54,5 +54,5 @@
|
|||||||
<p class="text-sm text-gray-500">No enrichment data available.</p>
|
<p class="text-sm text-gray-500">No enrichment data available.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
@@ -109,5 +109,5 @@
|
|||||||
<p class="text-sm text-gray-500">No form issues detected.</p>
|
<p class="text-sm text-gray-500">No form issues detected.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
@@ -116,5 +116,5 @@
|
|||||||
<p class="text-sm text-gray-500">No suspicious scripts detected.</p>
|
<p class="text-sm text-gray-500">No suspicious scripts detected.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
@@ -193,7 +193,7 @@
|
|||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
|
|||||||
120
app/templates/partials/result_text.html
Normal file
120
app/templates/partials/result_text.html
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
<!-- /templates/partials/result_text.html -->
|
||||||
|
<section id="sus_text" class="card">
|
||||||
|
<h2 class="text-lg font-semibold mb-3">Text</h2>
|
||||||
|
|
||||||
|
{% if suspicious_text and suspicious_text|length > 0 %}
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="w-full table-fixed text-sm"> <!-- matches forms table style -->
|
||||||
|
<colgroup>
|
||||||
|
<col class="w-[10%]"> <!-- Source -->
|
||||||
|
<col class="w-[10%]"> <!-- Indicators -->
|
||||||
|
<col class="w-[15%]"> <!-- Tags -->
|
||||||
|
<col class="w-[45%]"> <!-- Matches (Rules) -->
|
||||||
|
<col class="w-[25%]"> <!-- Text Snippet -->
|
||||||
|
</colgroup>
|
||||||
|
<thead class="text-gray-400 border-b border-gray-800">
|
||||||
|
<tr>
|
||||||
|
<th class="text-left py-2 pr-4 whitespace-normal break-words">Source</th>
|
||||||
|
<th class="text-left py-2 pr-4 whitespace-normal break-words">Indicators</th>
|
||||||
|
<th class="text-left py-2 pr-4 whitespace-normal break-words">Tags</th>
|
||||||
|
<th class="text-left py-2 pr-4 whitespace-normal break-words">Matches (Rules)</th>
|
||||||
|
<th class="text-left py-2 pr-4 whitespace-normal break-words">Text Snippet</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for rec in suspicious_text %}
|
||||||
|
<tr class="border-b border-gray-900 align-top">
|
||||||
|
<!-- Source -->
|
||||||
|
<td class="py-2 pr-4 break-words">
|
||||||
|
{{ (rec.type or 'page')|title }}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Indicators (count of rules matched) -->
|
||||||
|
<td class="py-2 pr-4 whitespace-nowrap">
|
||||||
|
{{ rec.rules|length if rec.rules else 0 }}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Tags (unique across rules) -->
|
||||||
|
<td class="py-2 pr-4 break-words">
|
||||||
|
{% set ns = namespace(tags=[]) %}
|
||||||
|
{% if rec.rules %}
|
||||||
|
{% for r in rec.rules %}
|
||||||
|
{% if r.tags %}
|
||||||
|
{% for t in r.tags %}
|
||||||
|
{% if t not in ns.tags %}
|
||||||
|
{% set ns.tags = ns.tags + [t] %}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% if ns.tags and ns.tags|length > 0 %}
|
||||||
|
<div class="flex flex-wrap gap-1">
|
||||||
|
{% for t in ns.tags %}
|
||||||
|
<span class="chip" title="Tag: {{ t }}">{{ t }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<span class="chip">None</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Matches (Rules) -->
|
||||||
|
<td class="py-2 pr-4 break-words">
|
||||||
|
{% if rec.rules and rec.rules|length > 0 %}
|
||||||
|
<ul class="space-y-1">
|
||||||
|
{% for r in rec.rules %}
|
||||||
|
<li title="{{ r.description or '' }}">
|
||||||
|
{{ r.name }}
|
||||||
|
{% if r.severity %}
|
||||||
|
{% set sev = r.severity|lower %}
|
||||||
|
<span class="ml-2 rounded-full px-2 py-0.5 text-xs border
|
||||||
|
{% if sev == 'high' %} badge badge-danger
|
||||||
|
{% elif sev == 'medium' %} badge badge-warn
|
||||||
|
{% else %} badge badge-info {% endif %}">
|
||||||
|
{{ r.severity|title }}
|
||||||
|
</span>
|
||||||
|
{% endif %}
|
||||||
|
{% if r.tags %}
|
||||||
|
{% for t in r.tags %}
|
||||||
|
<span class="chip" title="Tag: {{ t }}">{{ t }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% if r.description %}
|
||||||
|
<small class="text-gray-400"> — {{ r.description }}</small>
|
||||||
|
{% endif %}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% else %}
|
||||||
|
<span class="text-gray-500">N/A</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<!-- Text Snippet (matched phrases; let column width control it) -->
|
||||||
|
<td class="py-2 pr-4 align-top">
|
||||||
|
{% if rec.content_snippet %}
|
||||||
|
<details>
|
||||||
|
<summary class="cursor-pointer text-blue-300 hover:underline">
|
||||||
|
View snippet ({{ rec.content_snippet|length }} chars)
|
||||||
|
</summary>
|
||||||
|
<pre class="mt-1 bg-[#0b0f14] border border-gray-800 rounded-lg p-3
|
||||||
|
w-full max-w-full overflow-auto max-h-64
|
||||||
|
whitespace-pre-wrap break-words font-mono text-xs">{{ rec.content_snippet }}</pre>
|
||||||
|
</details>
|
||||||
|
{% else %}
|
||||||
|
<span class="text-gray-500">N/A</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% else %}
|
||||||
|
<p class="text-sm text-gray-500">No text issues detected.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
|
</section>
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
<a href="#redirects" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Redirects</a>
|
<a href="#redirects" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Redirects</a>
|
||||||
<a href="#forms" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Forms</a>
|
<a href="#forms" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Forms</a>
|
||||||
<a href="#scripts" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Suspicious Scripts</a>
|
<a href="#scripts" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Suspicious Scripts</a>
|
||||||
|
<a href="#sus_text" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Suspicious Text</a>
|
||||||
<a href="#screenshot" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Screenshot</a>
|
<a href="#screenshot" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Screenshot</a>
|
||||||
<a href="#source" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Source</a>
|
<a href="#source" class="px-2 py-1 rounded border border-gray-700 hover:bg-gray-800">Source</a>
|
||||||
</div>
|
</div>
|
||||||
@@ -35,7 +36,7 @@
|
|||||||
{{ request.host_url }}results/{{ uuid }}
|
{{ request.host_url }}results/{{ uuid }}
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
<p><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -73,23 +74,25 @@
|
|||||||
{% else %}
|
{% else %}
|
||||||
<p class="text-sm text-gray-500">No redirects detected.</p>
|
<p class="text-sm text-gray-500">No redirects detected.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Forms -->
|
<!-- Forms -->
|
||||||
{% include "partials/result_forms.html" %}
|
{% include "partials/result_forms.html" %}
|
||||||
|
|
||||||
|
|
||||||
<!-- Suspicious Scripts -->
|
<!-- Suspicious Scripts -->
|
||||||
{% include "partials/result_scripts.html" %}
|
{% include "partials/result_scripts.html" %}
|
||||||
|
|
||||||
|
<!-- Suspicious Text -->
|
||||||
|
{% include "partials/result_text.html" with context %}
|
||||||
|
|
||||||
<!-- Screenshot -->
|
<!-- Screenshot -->
|
||||||
<section id="screenshot" class="bg-card border border-gray-800 rounded-xl p-4">
|
<section id="screenshot" class="bg-card border border-gray-800 rounded-xl p-4">
|
||||||
<h2 class="text-lg font-semibold mb-3">Screenshot</h2>
|
<h2 class="text-lg font-semibold mb-3">Screenshot</h2>
|
||||||
<img src="{{ url_for('main.artifacts', run_uuid=uuid, filename='screenshot.png') }}"
|
<img src="{{ url_for('main.artifacts', run_uuid=uuid, filename='screenshot.png') }}"
|
||||||
alt="Screenshot"
|
alt="Screenshot"
|
||||||
class="w-full rounded-lg border border-gray-800">
|
class="w-full rounded-lg border border-gray-800">
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Source -->
|
<!-- Source -->
|
||||||
@@ -102,7 +105,7 @@
|
|||||||
View Source
|
View Source
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
<p class="mt-2"><a href="#top-jump-list" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
<p class="mt-2"><a href="#url-overview" class="text-sm text-gray-400 hover:text-blue-400">Back to top</a></p>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ from typing import Any, Dict, List, Optional
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from playwright.async_api import async_playwright, TimeoutError as PWTimeoutError
|
from playwright.async_api import async_playwright, TimeoutError as PWTimeoutError
|
||||||
|
|
||||||
@@ -85,64 +86,6 @@ class Browser:
|
|||||||
index = index + 1
|
index = index + 1
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
def run_rule_checks(self, text: str, category: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Run all rules for a given category against provided text, returning a table-friendly model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Text to analyze (HTML, snippet, etc.)
|
|
||||||
category: One of 'form', 'script', 'text' (or any category your rules use)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
{
|
|
||||||
"checks": [
|
|
||||||
{ "name": str, "description": str, "category": str,
|
|
||||||
"result": "PASS"|"FAIL", "reason": Optional[str],
|
|
||||||
"severity": Optional[str], "tags": Optional[List[str]] }, ...
|
|
||||||
],
|
|
||||||
"summary": { "fail_count": int, "total_rules": int }
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
out: Dict[str, Any] = {"checks": [], "summary": {"fail_count": 0, "total_rules": 0}}
|
|
||||||
engine = self._get_rule_engine()
|
|
||||||
|
|
||||||
if engine is None:
|
|
||||||
return out
|
|
||||||
|
|
||||||
try:
|
|
||||||
engine_results = engine.run_all(text, category=category) # list of dicts
|
|
||||||
index = 0
|
|
||||||
total = len(engine_results)
|
|
||||||
while index < total:
|
|
||||||
item = engine_results[index]
|
|
||||||
normalized = {
|
|
||||||
"name": item.get("name"),
|
|
||||||
"description": item.get("description"),
|
|
||||||
"category": item.get("category"),
|
|
||||||
"result": item.get("result"), # "PASS" | "FAIL"
|
|
||||||
"reason": item.get("reason"), # present on FAIL by engine design
|
|
||||||
"severity": item.get("severity"),
|
|
||||||
"tags": item.get("tags"),
|
|
||||||
}
|
|
||||||
out["checks"].append(normalized)
|
|
||||||
index = index + 1
|
|
||||||
|
|
||||||
out["summary"] = self._summarize_results(out["checks"])
|
|
||||||
except Exception as exc:
|
|
||||||
# Preserve shape; record the error as a synthetic PASS (so UI doesn't break)
|
|
||||||
out["checks"].append({
|
|
||||||
"name": "engine_error",
|
|
||||||
"description": "Rule engine failed during evaluation",
|
|
||||||
"category": category,
|
|
||||||
"result": "PASS",
|
|
||||||
"reason": f"{exc}",
|
|
||||||
"severity": None,
|
|
||||||
"tags": None
|
|
||||||
})
|
|
||||||
out["summary"] = {"fail_count": 0, "total_rules": 1}
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
def build_rule_checks_overview(self, full_html_text: str) -> List[Dict[str, Any]]:
|
def build_rule_checks_overview(self, full_html_text: str) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Build a top-level overview for the results page: runs each category across
|
Build a top-level overview for the results page: runs each category across
|
||||||
@@ -376,6 +319,135 @@ class Browser:
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def analyze_text(self, html: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Extract visible page text and evaluate text rules.
|
||||||
|
Only include rows that matched at least one rule.
|
||||||
|
|
||||||
|
Returns a list with 0..1 records shaped like:
|
||||||
|
{
|
||||||
|
"type": "page",
|
||||||
|
"content_snippet": "<matched words/phrases joined>",
|
||||||
|
"rules": [
|
||||||
|
{"name": "...", "description": "...", "severity": "...", "tags": [...]},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
results: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Short-circuit on missing html
|
||||||
|
if not html:
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Extract visible text (strip scripts/styles)
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
for tag in soup(["script", "style", "noscript", "template"]):
|
||||||
|
tag.decompose()
|
||||||
|
# Basic hidden cleanup (best-effort)
|
||||||
|
for el in soup.select('[hidden], [aria-hidden="true"]'):
|
||||||
|
el.decompose()
|
||||||
|
|
||||||
|
text = soup.get_text(separator=" ", strip=True)
|
||||||
|
if not text:
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Normalize whitespace so regexes behave consistently
|
||||||
|
text = re.sub(r"\s+", " ", text).strip()
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
# Keep consistency with your other analyzers
|
||||||
|
results.append({
|
||||||
|
"type": "page",
|
||||||
|
"heuristics": [f"Text extraction error: {exc}"]
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
engine = self._get_rule_engine()
|
||||||
|
if engine is None:
|
||||||
|
return results
|
||||||
|
|
||||||
|
matches_for_record: List[Dict[str, Any]] = []
|
||||||
|
matched_phrases: List[str] = [] # order-preserving
|
||||||
|
seen_phrases = set()
|
||||||
|
|
||||||
|
# How many characters to show for the preview snippet
|
||||||
|
preview_len = getattr(settings.ui, "snippet_preview_len", 200)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1) Regex rules over full page text
|
||||||
|
for r in engine.rules:
|
||||||
|
if getattr(r, "category", None) != "text":
|
||||||
|
continue
|
||||||
|
|
||||||
|
rtype = getattr(r, "rule_type", None)
|
||||||
|
if rtype == "regex":
|
||||||
|
ok, _reason = r.run(text)
|
||||||
|
if not ok:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to pull matched words/phrases
|
||||||
|
compiled = getattr(r, "_compiled_regex", None)
|
||||||
|
if compiled is None and getattr(r, "pattern", None):
|
||||||
|
try:
|
||||||
|
compiled = re.compile(r.pattern, re.IGNORECASE)
|
||||||
|
except re.error:
|
||||||
|
compiled = None
|
||||||
|
|
||||||
|
# Collect a few (deduped) matched phrases
|
||||||
|
if compiled is not None:
|
||||||
|
# limit per rule to avoid flooding
|
||||||
|
per_rule_count = 0
|
||||||
|
for m in compiled.finditer(text):
|
||||||
|
phrase = m.group(0).strip()
|
||||||
|
if phrase and phrase not in seen_phrases:
|
||||||
|
matched_phrases.append(phrase)
|
||||||
|
seen_phrases.add(phrase)
|
||||||
|
per_rule_count += 1
|
||||||
|
if per_rule_count >= 5: # cap per rule
|
||||||
|
break
|
||||||
|
|
||||||
|
matches_for_record.append({
|
||||||
|
"name": getattr(r, "name", "unknown_rule"),
|
||||||
|
"description": getattr(r, "description", "") or "",
|
||||||
|
"severity": getattr(r, "severity", None),
|
||||||
|
"tags": getattr(r, "tags", None),
|
||||||
|
})
|
||||||
|
|
||||||
|
elif rtype == "function":
|
||||||
|
# Optional: function-style rules can inspect the full text
|
||||||
|
facts = {"text": text, "category": "text"}
|
||||||
|
ok, reason = r.run(facts)
|
||||||
|
if ok:
|
||||||
|
matches_for_record.append({
|
||||||
|
"name": getattr(r, "name", "unknown_rule"),
|
||||||
|
"description": (reason or "") or getattr(r, "description", ""),
|
||||||
|
"severity": getattr(r, "severity", None),
|
||||||
|
"tags": getattr(r, "tags", None),
|
||||||
|
})
|
||||||
|
|
||||||
|
if matches_for_record:
|
||||||
|
# Build the snippet from matched words/phrases
|
||||||
|
joined = " … ".join(matched_phrases) if matched_phrases else ""
|
||||||
|
if len(joined) > preview_len:
|
||||||
|
joined = joined[:preview_len] + "…"
|
||||||
|
|
||||||
|
record: Dict[str, Any] = {
|
||||||
|
"type": "page",
|
||||||
|
"content_snippet": joined or None,
|
||||||
|
"rules": matches_for_record,
|
||||||
|
}
|
||||||
|
results.append(record)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
results.append({
|
||||||
|
"type": "page",
|
||||||
|
"heuristics": [f"Text analysis error: {exc}"]
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Fetcher / Orchestrator
|
# Fetcher / Orchestrator
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
@@ -458,12 +530,15 @@ class Browser:
|
|||||||
# Read back saved source
|
# Read back saved source
|
||||||
html_content = source_path.read_text(encoding="utf-8")
|
html_content = source_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
# Forms analysis (per-form rule checks)
|
# Forms analysis
|
||||||
forms_info = self.analyze_forms(html_content, final_url)
|
forms_info = self.analyze_forms(html_content, final_url)
|
||||||
|
|
||||||
# Scripts artifacts (no detection here)
|
# Scripts artifacts
|
||||||
suspicious_scripts = self.analyze_scripts(html_content, base_url=final_url)
|
suspicious_scripts = self.analyze_scripts(html_content, base_url=final_url)
|
||||||
|
|
||||||
|
# suspicious text
|
||||||
|
flagged_text = self.analyze_text(html_content)
|
||||||
|
|
||||||
# Enrichment
|
# Enrichment
|
||||||
enrichment = enrich_url(url, fetch_ssl_enabled)
|
enrichment = enrich_url(url, fetch_ssl_enabled)
|
||||||
|
|
||||||
@@ -486,7 +561,8 @@ class Browser:
|
|||||||
"scripts": scripts_seen,
|
"scripts": scripts_seen,
|
||||||
"forms": forms_info,
|
"forms": forms_info,
|
||||||
"suspicious_scripts": suspicious_scripts,
|
"suspicious_scripts": suspicious_scripts,
|
||||||
"rule_checks": rule_checks_overview, # table-ready for UI
|
"suspicious_text":flagged_text,
|
||||||
|
"rule_checks": rule_checks_overview,
|
||||||
"enrichment": enrichment
|
"enrichment": enrichment
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,9 @@ class AppConfig:
|
|||||||
name: str = "MyApp"
|
name: str = "MyApp"
|
||||||
version_major: int = 1
|
version_major: int = 1
|
||||||
version_minor: int = 0
|
version_minor: int = 0
|
||||||
print_rule_loads: bool = False
|
log_rule_loads: bool = False
|
||||||
|
log_rule_dispatch: bool = False
|
||||||
|
log_rule_debug: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
Reference in New Issue
Block a user