feat: HTTPS auto-normalization; robust TLS intel UI; global rules state; clean logging; preload
- Add SSL/TLS intelligence pipeline:
- crt.sh lookup with expired-filtering and root-domain wildcard resolution
- live TLS version/cipher probe with weak/legacy flags and probe notes
- UI: card + matrix rendering, raw JSON toggle, and host/wildcard cert lists
- Front page: checkbox to optionally fetch certificate/CT data
- Introduce `URLNormalizer` with punycode support and typo repair
- Auto-prepend `https://` for bare domains (e.g., `google.com`)
- Optional quick HTTPS reachability + `http://` fallback
- Provide singleton via function-cached `@singleton_loader`:
- `get_url_normalizer()` reads defaults from Settings (if present)
- Standardize function-rule return shape to `(bool, dict|None)` across
`form_*` and `script_*` rules; include structured payloads (`note`, hosts, ext, etc.)
- Harden `FunctionRuleAdapter`:
- Coerce legacy returns `(bool)`, `(bool, str)` → normalized outputs
- Adapt non-dict inputs to facts (category-aware and via provided adapter)
- Return `(True, dict)` on match, `(False, None)` on miss
- Bind-time logging with file:line + function id for diagnostics
- `RuleEngine`:
- Back rules by private `self._rules`; `rules` property returns copy
- Idempotent `add_rule(replace=False)` with in-place replace and regex (re)compile
- Fix AttributeError from property assignment during `__init__`
- Replace hidden singleton factory with explicit builder + global state:
- `app/rules/factory.py::build_rules_engine()` builds and logs totals
- `app/state.py` exposes `set_rules_engine()` / `get_rules_engine()` as the SOF
- `app/wsgi.py` builds once at preload and publishes via `set_rules_engine()`
- Add lightweight debug hooks (`SS_DEBUG_RULES=1`) to trace engine id and rule counts
- Unify logging wiring:
- `wire_logging_once(app)` clears and attaches a single handler chain
- Create two named loggers: `sneakyscope.app` and `sneakyscope.engine`
- Disable propagation to prevent dupes; include pid/logger name in format
- Remove stray/duplicate handlers and import-time logging
- Optional dedup filter for bursty repeats (kept off by default)
- Gunicorn: enable `--preload` in entrypoint to avoid thread races and double registration
- Documented foreground vs background log “double consumer” caveat (attach vs `compose logs`)
- Jinja: replace `{% return %}` with structured `if/elif/else` branches
- Add toggle button to show raw JSON for TLS/CT section
- Consumers should import the rules engine via:
- `from app.state import get_rules_engine`
- Use `build_rules_engine()` **only** during preload/init to construct the instance,
then publish with `set_rules_engine()`. Do not call old singleton factories.
- New/changed modules (high level):
- `app/utils/urltools.py` (+) — URLNormalizer + `get_url_normalizer()`
- `app/rules/function_rules.py` (±) — normalized payload returns
- `engine/function_rule_adapter.py` (±) — coercion, fact adaptation, bind logs
- `app/utils/rules_engine.py` (±) — `_rules`, idempotent `add_rule`, fixes
- `app/rules/factory.py` (±) — pure builder; totals logged post-registration
- `app/state.py` (+) — process-global rules engine
- `app/logging_setup.py` (±) — single chain, two named loggers
- `app/wsgi.py` (±) — preload build + `set_rules_engine()`
- `entrypoint.sh` (±) — add `--preload`
- templates (±) — TLS card, raw toggle; front-page checkbox
Closes: flaky rule-type warnings, duplicate logs, and multi-worker race on rules init.
This commit is contained in:
133
app/utils/url_tools.py
Normal file
133
app/utils/url_tools.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# app/utils/urltools.py
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import requests
|
||||
import idna
|
||||
|
||||
# Reuse existing decorator (import from wherever you defined it)
|
||||
|
||||
from app.utils.settings import singleton_loader
|
||||
|
||||
|
||||
class URLNormalizer:
|
||||
"""
|
||||
Normalize user input into a fully-qualified URL for analysis.
|
||||
|
||||
Behavior:
|
||||
- If no scheme is present, prepend https:// by default.
|
||||
- Optional quick HTTPS reachability check with fallback to http://.
|
||||
- Converts Unicode hostnames to punycode via IDNA.
|
||||
|
||||
Notes:
|
||||
- Keep the first-constructed configuration stable via the singleton factory.
|
||||
- Avoids Flask/current_app/threading per your project style.
|
||||
"""
|
||||
|
||||
def __init__(self, prefer_https: bool = True, fallback_http: bool = False, connect_timeout: float = 2.0):
|
||||
self.prefer_https = bool(prefer_https)
|
||||
self.fallback_http = bool(fallback_http)
|
||||
self.connect_timeout = float(connect_timeout)
|
||||
|
||||
def normalize_for_analysis(self, raw_input: str) -> str:
|
||||
"""
|
||||
Convert raw input (URL or domain) into a normalized URL string.
|
||||
|
||||
Raises:
|
||||
ValueError: if input is empty/invalid.
|
||||
"""
|
||||
if raw_input is None:
|
||||
raise ValueError("Empty input")
|
||||
|
||||
text = str(raw_input).strip()
|
||||
if text == "":
|
||||
raise ValueError("Empty input")
|
||||
|
||||
# Repair common typos (missing colon)
|
||||
lower = text.lower()
|
||||
if lower.startswith("http//"):
|
||||
text = "http://" + text[6:]
|
||||
elif lower.startswith("https//"):
|
||||
text = "https://" + text[7:]
|
||||
|
||||
# Respect an existing scheme
|
||||
if "://" in text:
|
||||
parsed = urlparse(text)
|
||||
return self._recompose_with_punycode_host(parsed)
|
||||
|
||||
# No scheme -> build one
|
||||
if self.prefer_https:
|
||||
https_url = "https://" + text
|
||||
if self.fallback_http:
|
||||
if self._quick_https_ok(https_url):
|
||||
return self._recompose_with_punycode_host(urlparse(https_url))
|
||||
http_url = "http://" + text
|
||||
return self._recompose_with_punycode_host(urlparse(http_url))
|
||||
return self._recompose_with_punycode_host(urlparse(https_url))
|
||||
|
||||
http_url = "http://" + text
|
||||
return self._recompose_with_punycode_host(urlparse(http_url))
|
||||
|
||||
def _recompose_with_punycode_host(self, parsed):
|
||||
"""
|
||||
Recompose a parsed URL with hostname encoded to ASCII (punycode).
|
||||
Preserves userinfo, port, path, params, query, fragment.
|
||||
"""
|
||||
host = parsed.hostname
|
||||
if host is None:
|
||||
return urlunparse(parsed)
|
||||
|
||||
try:
|
||||
ascii_host = idna.encode(host).decode("ascii")
|
||||
except Exception:
|
||||
ascii_host = host
|
||||
|
||||
# rebuild netloc (auth + port)
|
||||
netloc = ascii_host
|
||||
if parsed.port:
|
||||
netloc = f"{netloc}:{parsed.port}"
|
||||
if parsed.username:
|
||||
if parsed.password:
|
||||
netloc = f"{parsed.username}:{parsed.password}@{netloc}"
|
||||
else:
|
||||
netloc = f"{parsed.username}@{netloc}"
|
||||
|
||||
return urlunparse((
|
||||
parsed.scheme,
|
||||
netloc,
|
||||
parsed.path or "",
|
||||
parsed.params or "",
|
||||
parsed.query or "",
|
||||
parsed.fragment or "",
|
||||
))
|
||||
|
||||
def _quick_https_ok(self, https_url: str) -> bool:
|
||||
"""
|
||||
Quick reachability check for https:// using a HEAD request.
|
||||
Redirects allowed; TLS verify disabled — posture-only.
|
||||
"""
|
||||
try:
|
||||
resp = requests.head(https_url, allow_redirects=True, timeout=self.connect_timeout, verify=False)
|
||||
_ = resp.status_code
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ---- Singleton factory using our decorator ----
|
||||
@singleton_loader
|
||||
def get_url_normalizer(
|
||||
prefer_https: bool = True,
|
||||
fallback_http: bool = False,
|
||||
connect_timeout: float = 2.0,
|
||||
) -> URLNormalizer:
|
||||
"""
|
||||
Return the singleton URLNormalizer instance.
|
||||
|
||||
IMPORTANT: With this decorator, the FIRST call's arguments "win".
|
||||
Later calls return the cached instance and ignore new arguments.
|
||||
|
||||
"""
|
||||
return URLNormalizer(
|
||||
prefer_https=prefer_https,
|
||||
fallback_http=fallback_http,
|
||||
connect_timeout=connect_timeout,
|
||||
)
|
||||
Reference in New Issue
Block a user